[clang] [libc] [llvm] [AMDGPU] Implement variadic functions by IR lowering (PR #93362)

Pierre van Houtryve via cfe-commits cfe-commits at lists.llvm.org
Wed Jun 5 06:55:58 PDT 2024


================
@@ -0,0 +1,1037 @@
+//===-- ExpandVariadicsPass.cpp --------------------------------*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an optimization pass for variadic functions. If called from codegen,
+// it can serve as the implementation of variadic functions for a given target.
+//
+// The strategy is to turn the ... part of a variadic function into a va_list
+// and fix up the call sites. The majority of the pass is target independent.
+// The exceptions are the va_list type itself and the rules for where to store
+// variables in memory such that va_arg can iterate over them given a va_list.
+//
+// The majority of the plumbing is splitting the variadic function into a
+// single basic block that packs the variadic arguments into a va_list and
+// a second function that does the work of the original. That packing is
+// exactly what is done by va_start. Further, the transform from ... to va_list
+// replaced va_start with an operation to copy a va_list from the new argument,
+// which is exactly a va_copy. This is useful for reducing target-dependence.
+//
+// A va_list instance is a forward iterator, where the primary operation va_arg
+// is dereference-then-increment. This interface forces significant convergent
+// evolution between target specific implementations. The variation in runtime
+// data layout is limited to that representable by the iterator, parameterised
+// by the type passed to the va_arg instruction.
+//
+// Therefore the majority of the target specific subtlety is packing arguments
+// into a stack allocated buffer such that a va_list can be initialised with it
+// and the va_arg expansion for the target will find the arguments at runtime.
+//
+// The aggregate effect is to unblock other transforms, most critically the
+// general purpose inliner. Known calls to variadic functions become zero cost.
+//
+// Consistency with clang is primarily tested by emitting va_arg using clang
+// then expanding the variadic functions using this pass, followed by trying
+// to constant fold the functions to no-ops.
+//
+// Target specific behaviour is tested in IR - mainly checking that values are
+// put into positions in call frames that make sense for that particular target.
+//
+// There is one "clever" invariant in use. va_start intrinsics that are not
+// within a varidic functions are an error in the IR verifier. When this
+// transform moves blocks from a variadic function into a fixed arity one, it
+// moves va_start intrinsics along with everything else. That means that the
+// va_start intrinsics that need to be rewritten to use the trailing argument
+// are exactly those that are in non-variadic functions so no further state
+// is needed to distinguish those that need to be rewritten.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/ExpandVariadics.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Passes/OptimizationLevel.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/TargetParser/Triple.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+#define DEBUG_TYPE "expand-variadics"
+
+using namespace llvm;
+
+namespace {
+
+cl::opt<ExpandVariadicsMode> ExpandVariadicsModeOption(
+    DEBUG_TYPE "-override", cl::desc("Override the behaviour of " DEBUG_TYPE),
+    cl::init(ExpandVariadicsMode::Unspecified),
+    cl::values(clEnumValN(ExpandVariadicsMode::Unspecified, "unspecified",
+                          "Use the implementation defaults"),
+               clEnumValN(ExpandVariadicsMode::Disable, "disable",
+                          "Disable the pass entirely"),
+               clEnumValN(ExpandVariadicsMode::Optimize, "optimize",
+                          "Optimise without changing ABI"),
+               clEnumValN(ExpandVariadicsMode::Lowering, "lowering",
+                          "Change variadic calling convention")));
+
+bool commandLineOverride() {
+  return ExpandVariadicsModeOption != ExpandVariadicsMode::Unspecified;
+}
+
+// Instances of this class encapsulate the target-dependant behaviour as a
+// function of triple. Implementing a new ABI is adding a case to the switch
+// in create(llvm::Triple) at the end of this file.
+class VariadicABIInfo {
+protected:
+  VariadicABIInfo() {}
+
+public:
+  static std::unique_ptr<VariadicABIInfo> create(llvm::Triple const &Triple);
+
+  // Allow overriding whether the pass runs on a per-target basis
+  virtual bool enableForTarget() = 0;
+
+  // Whether a valist instance is passed by value or by address
+  // I.e. does it need to be alloca'ed and stored into, or can
+  // it be passed directly in a SSA register
+  virtual bool vaListPassedInSSARegister() = 0;
+
+  // The type of a va_list iterator object
+  virtual Type *vaListType(LLVMContext &Ctx) = 0;
+
+  // The type of a va_list as a function argument as lowered by C
+  virtual Type *vaListParameterType(Module &M) = 0;
+
+  // Initialize an allocated va_list object to point to an already
+  // initialized contiguous memory region.
+  // Return the value to pass as the va_list argument
+  virtual Value *initializeVaList(Module &M, LLVMContext &Ctx,
+                                  IRBuilder<> &Builder, AllocaInst *VaList,
+                                  Value *Buffer) = 0;
+
+  struct VAArgSlotInfo {
+    Align DataAlign; // With respect to the call frame
+    bool Indirect;   // Passed via a pointer
+  };
+  virtual VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) = 0;
+
+  // Targets implemented so far all have the same trivial lowering for these
+  bool vaEndIsNop() { return true; }
+  bool vaCopyIsMemcpy() { return true; }
+
+  virtual ~VariadicABIInfo() {}
+};
+
+// Module implements getFunction() which returns nullptr on missing declaration
+// and getOrInsertFunction which creates one when absent. Intrinsics.h only
+// implements getDeclaration which creates one when missing. Checking whether
+// an intrinsic exists thus inserts it in the module and it then needs to be
+// deleted again to clean up.
+// The right name for the two functions on intrinsics would match Module::,
+// but doing that in a single change would introduce nullptr dereferences
+// where currently there are none. The minimal collateral damage approach
+// would split the change over a release to help downstream branches. As it
+// is unclear what approach will be preferred, implementing the trivial
+// function here in the meantime to decouple from that discussion.
+Function *getPreexistingDeclaration(Module *M, Intrinsic::ID Id,
+                                    ArrayRef<Type *> Tys = std::nullopt) {
+  auto *FT = Intrinsic::getType(M->getContext(), Id, Tys);
+  return M->getFunction(Tys.empty() ? Intrinsic::getName(Id)
+                                    : Intrinsic::getName(Id, Tys, M, FT));
+}
+
+class ExpandVariadics : public ModulePass {
+
+  // The pass construction sets the default to optimize when called from middle
+  // end and lowering when called from the backend. The command line variable
+  // overrides that. This is useful for testing and debugging. It also allows
+  // building an applications with variadic functions wholly removed if one
+  // has sufficient control over the dependencies, e.g. a statically linked
+  // clang that has no variadic function calls remaining in the binary.
+
+public:
+  static char ID;
+  const ExpandVariadicsMode Mode;
+  std::unique_ptr<VariadicABIInfo> ABI;
+
+  ExpandVariadics(ExpandVariadicsMode Mode)
+      : ModulePass(ID),
+        Mode(commandLineOverride() ? ExpandVariadicsModeOption : Mode) {}
+
+  StringRef getPassName() const override { return "Expand variadic functions"; }
+
+  bool rewriteABI() { return Mode == ExpandVariadicsMode::Lowering; }
+
+  bool runOnModule(Module &M) override;
+
+  bool runOnFunction(Module &M, IRBuilder<> &Builder, Function *F);
+
+  Function *replaceAllUsesWithNewDeclaration(Module &M,
+                                             Function *OriginalFunction);
+
+  Function *deriveFixedArityReplacement(Module &M, IRBuilder<> &Builder,
+                                        Function *OriginalFunction);
+
+  Function *defineVariadicWrapper(Module &M, IRBuilder<> &Builder,
+                                  Function *VariadicWrapper,
+                                  Function *FixedArityReplacement);
+
+  bool expandCall(Module &M, IRBuilder<> &Builder, CallBase *CB, FunctionType *,
+                  Function *NF);
+
+  // The intrinsic functions va_copy and va_end are removed unconditionally.
+  // They correspond to a memcpy and a no-op on all implemented targets.
+  // The va_start intrinsic is removed from basic blocks that were not created
+  // by this pass, some may remain if needed to maintain the external ABI.
+
+  template <Intrinsic::ID ID, typename InstructionType>
+  bool expandIntrinsicUsers(Module &M, IRBuilder<> &Builder,
+                            PointerType *IntrinsicArgType) {
+    bool Changed = false;
+    const DataLayout &DL = M.getDataLayout();
+    if (Function *Intrinsic =
+            getPreexistingDeclaration(&M, ID, {IntrinsicArgType})) {
+      for (User *U : llvm::make_early_inc_range(Intrinsic->users())) {
+        if (auto *I = dyn_cast<InstructionType>(U)) {
+          Changed |= expandVAIntrinsicCall(Builder, DL, I);
+        }
+      }
+      if (Intrinsic->use_empty())
+        Intrinsic->eraseFromParent();
+    }
+    return Changed;
+  }
+
+  bool expandVAIntrinsicUsersWithAddrspace(Module &M, IRBuilder<> &Builder,
+                                           unsigned Addrspace) {
+    auto &Ctx = M.getContext();
+    PointerType *IntrinsicArgType = PointerType::get(Ctx, Addrspace);
+    bool Changed = false;
+
+    // expand vastart before vacopy as vastart may introduce a vacopy
+    Changed |= expandIntrinsicUsers<Intrinsic::vastart, VAStartInst>(
+        M, Builder, IntrinsicArgType);
+    Changed |= expandIntrinsicUsers<Intrinsic::vaend, VAEndInst>(
+        M, Builder, IntrinsicArgType);
+    Changed |= expandIntrinsicUsers<Intrinsic::vacopy, VACopyInst>(
+        M, Builder, IntrinsicArgType);
+    return Changed;
+  }
+
+  bool expandVAIntrinsicCall(IRBuilder<> &Builder, const DataLayout &DL,
+                             VAStartInst *Inst);
+
+  bool expandVAIntrinsicCall(IRBuilder<> &, const DataLayout &,
+                             VAEndInst *Inst);
+
+  bool expandVAIntrinsicCall(IRBuilder<> &Builder, const DataLayout &DL,
+                             VACopyInst *Inst);
+
+  FunctionType *inlinableVariadicFunctionType(Module &M, FunctionType *FTy) {
+    // The type of "FTy" with the ... removed and a va_list appended
+    SmallVector<Type *> ArgTypes(FTy->param_begin(), FTy->param_end());
+    ArgTypes.push_back(ABI->vaListParameterType(M));
+    return FunctionType::get(FTy->getReturnType(), ArgTypes,
+                             /*IsVarArgs=*/false);
+  }
+
+  static ConstantInt *sizeOfAlloca(LLVMContext &Ctx, const DataLayout &DL,
+                                   AllocaInst *Alloced) {
+    std::optional<TypeSize> AllocaTypeSize = Alloced->getAllocationSize(DL);
+    uint64_t AsInt = AllocaTypeSize ? AllocaTypeSize->getFixedValue() : 0;
+    return ConstantInt::get(Type::getInt64Ty(Ctx), AsInt);
+  }
+
+  bool expansionApplicableToFunction(Module &M, Function *F) {
+    if (F->isIntrinsic() || !F->isVarArg() ||
+        F->hasFnAttribute(Attribute::Naked)) {
+      return false;
+    }
+
+    if (F->getCallingConv() != CallingConv::C)
+      return false;
+
+    if (rewriteABI())
+      return true;
+
+    if (!F->hasExactDefinition())
+      return false;
+
+    return true;
+  }
+
+  bool expansionApplicableToFunctionCall(CallBase *CB) {
+    if (CallInst *CI = dyn_cast<CallInst>(CB)) {
+      if (CI->isMustTailCall()) {
+        // Cannot expand musttail calls
+        return false;
+      }
+
+      if (CI->getCallingConv() != CallingConv::C)
+        return false;
+
+      return true;
+    }
+
+    if (isa<InvokeInst>(CB)) {
+      // Invoke not implemented in initial implementation of pass
+      return false;
+    }
+
+    // Other unimplemented derivative of CallBase
+    return false;
+  }
+
+  class ExpandedCallFrame {
+    // Helper for constructing an alloca instance containing the arguments bound
+    // to the variadic ... parameter, rearranged to allow indexing through a
+    // va_list iterator
+    enum { N = 4 };
+    SmallVector<Type *, N> FieldTypes;
+    enum Tag { Store, Memcpy, Padding };
+    SmallVector<std::tuple<Value *, uint64_t, Tag>, N> Source;
+
+    template <Tag tag> void append(Type *FieldType, Value *V, uint64_t Bytes) {
+      FieldTypes.push_back(FieldType);
+      Source.push_back({V, Bytes, tag});
+    }
+
+  public:
+    void store(LLVMContext &Ctx, Type *T, Value *V) { append<Store>(T, V, 0); }
+
+    void memcpy(LLVMContext &Ctx, Type *T, Value *V, uint64_t Bytes) {
+      append<Memcpy>(T, V, Bytes);
+    }
+
+    void padding(LLVMContext &Ctx, uint64_t By) {
+      append<Padding>(ArrayType::get(Type::getInt8Ty(Ctx), By), nullptr, 0);
+    }
+
+    size_t size() const { return FieldTypes.size(); }
+    bool empty() const { return FieldTypes.empty(); }
+
+    StructType *asStruct(LLVMContext &Ctx, StringRef Name) {
+      const bool IsPacked = true;
+      return StructType::create(Ctx, FieldTypes,
+                                (Twine(Name) + ".vararg").str(), IsPacked);
+    }
+
+    void initializeStructAlloca(const DataLayout &DL, IRBuilder<> &Builder,
+                                AllocaInst *Alloced) {
+
+      StructType *VarargsTy = cast<StructType>(Alloced->getAllocatedType());
+
+      for (size_t I = 0; I < size(); I++) {
+
+        auto [V, bytes, tag] = Source[I];
+
+        if (tag == Padding) {
+          assert(V == nullptr);
+          continue;
+        }
+
+        auto Dst = Builder.CreateStructGEP(VarargsTy, Alloced, I);
+
+        assert(V != nullptr);
+
+        if (tag == Store) {
+          Builder.CreateStore(V, Dst);
+        }
+
+        if (tag == Memcpy) {
+          Builder.CreateMemCpy(Dst, {}, V, {}, bytes);
+        }
----------------
Pierre-vh wrote:

```suggestion
        if (tag == Store) 
          Builder.CreateStore(V, Dst);
        else
          Builder.CreateMemCpy(Dst, {}, V, {}, bytes);
```

https://github.com/llvm/llvm-project/pull/93362


More information about the cfe-commits mailing list