[clang] [llvm] [WIP] Expand variadic functions in IR (PR #89007)

Jon Chesterfield via cfe-commits cfe-commits at lists.llvm.org
Wed Apr 17 04:17:10 PDT 2024


================
@@ -0,0 +1,1056 @@
+//===-- ExpandVariadicsPass.cpp --------------------------------*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an optimization pass for variadic functions. If called from codegen,
+// it can serve as the implementation of variadic functions for a given target.
+//
+// The strategy is to turn the ... part of a varidic function into a va_list
+// and fix up the call sites. This is completely effective if the calling
+// convention can declare that to be the right thing, e.g. on GPUs or where
+// the application is wholly statically linked. In the usual case, it will
+// replace known calls to known variadic functions with calls that are amenable
+// to inlining and other optimisations.
+//
+// The target-dependent parts are in class VariadicABIInfo. Enabling a new
+// target means adding a case to VariadicABIInfo::create() along with tests.
+// This will be especially simple if the va_list representation is a char*.
+//
+// The majority of the plumbing is splitting the variadic function into a
+// single basic block that packs the variadic arguments into a va_list and
+// a second function that does the work of the original. The target specific
+// part is packing arguments into a contiguous buffer that the clang expansion
+// of va_arg will do the right thing with.
+//
+// The aggregate effect is to unblock other transforms, most critically the
+// general purpose inliner. Known calls to variadic functions become zero cost.
+//
+// Consistency with clang is primarily tested by emitting va_arg using clang
+// then expanding the variadic functions using this pass, followed by trying
+// to constant fold the functions to no-ops.
+//
+// Target specific behaviour is tested in IR - mainly checking that values are
+// put into positions in call frames that make sense for that particular target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/ExpandVariadics.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Passes/OptimizationLevel.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/TargetParser/Triple.h"
+
+#include <cstdio>
+
+#define DEBUG_TYPE "expand-variadics"
+
+using namespace llvm;
+
+cl::opt<ExpandVariadicsMode> ExpandVariadicsModeOption(
+    DEBUG_TYPE "-override", cl::desc("Override the behaviour of " DEBUG_TYPE),
+    cl::init(ExpandVariadicsMode::unspecified),
+    cl::values(clEnumValN(ExpandVariadicsMode::unspecified, "unspecified",
+                          "Use the implementation defaults"),
+               clEnumValN(ExpandVariadicsMode::disable, "disable",
+                          "Disable the pass entirely"),
+               clEnumValN(ExpandVariadicsMode::optimize, "optimize",
+                          "Optimise without changing ABI"),
+               clEnumValN(ExpandVariadicsMode::lowering, "lowering",
+                          "Change variadic calling convention")));
+
+namespace {
+
+// Module implements getFunction() which returns nullptr on missing declaration
+// and getOrInsertFunction which creates one when absent. Intrinsics.h
+// implements getDeclaration which creates one when missing. This should be
+// changed to be consistent with Module()'s naming. Implementing as a local
+// function here in the meantime to decouple from that process.
+Function *getPreexistingDeclaration(Module *M, Intrinsic::ID id,
+                                    ArrayRef<Type *> Tys = std::nullopt) {
+  auto *FT = Intrinsic::getType(M->getContext(), id, Tys);
+  return M->getFunction(Tys.empty() ? Intrinsic::getName(id)
+                                    : Intrinsic::getName(id, Tys, M, FT));
+}
+
+// Lots of targets use a void* pointed at a buffer for va_list.
+// Some use more complicated iterator constructs. Type erase that
+// so the rest of the pass can operation on either.
+// Virtual functions where different targets want different behaviour,
+// normal where all implemented targets presently have the same.
+struct VAListInterface {
+  virtual ~VAListInterface() {}
+
+  // Whether a valist instance is passed by value or by address
+  // I.e. does it need to be alloca'ed and stored into, or can
+  // it be passed directly in a SSA register
+  virtual bool passedInSSARegister() = 0;
+
+  // The type of a va_list iterator object
+  virtual Type *vaListType(LLVMContext &Ctx) = 0;
+
+  // The type of a va_list as a function argument as lowered by C
+  virtual Type *vaListParameterType(Module &M) = 0;
+
+  // Initialise an allocated va_list object to point to an already
+  // initialised contiguous memory region.
+  // Return the value to pass as the va_list argument
+  virtual Value *initializeVAList(LLVMContext &Ctx, IRBuilder<> &Builder,
+                                  AllocaInst *, Value * /*buffer*/) = 0;
+
+  // Simple lowering suffices for va_end, va_copy for current targets
+  bool vaEndIsNop() { return true; }
+  bool vaCopyIsMemcpy() { return true; }
+};
+
+// The majority case - a void* into an alloca
+struct VoidPtr final : public VAListInterface {
+  bool passedInSSARegister() override { return true; }
+
+  Type *vaListType(LLVMContext &Ctx) override {
+    return PointerType::getUnqual(Ctx);
+  }
+
+  Type *vaListParameterType(Module &M) override {
+    return PointerType::getUnqual(M.getContext());
+  }
+
+  Value *initializeVAList(LLVMContext &Ctx, IRBuilder<> &Builder,
+                          AllocaInst * /*va_list*/, Value *buffer) override {
+    return buffer;
+  }
+};
+
+struct VoidPtrAllocaAddrspace final : public VAListInterface {
+
+  bool passedInSSARegister() override { return true; }
+
+  Type *vaListType(LLVMContext &Ctx) override {
+    return PointerType::getUnqual(Ctx);
+  }
+
+  Type *vaListParameterType(Module &M) override {
+    const DataLayout &DL = M.getDataLayout();
+    return DL.getAllocaPtrType(M.getContext());
+  }
+
+  Value *initializeVAList(LLVMContext &Ctx, IRBuilder<> &Builder,
+                          AllocaInst * /*va_list*/, Value *buffer) override {
+    return buffer;
+  }
+};
+
+// SystemV as used by X64 Linux and others
+struct SystemV final : public VAListInterface {
+  bool passedInSSARegister() override { return false; }
+
+  Type *vaListType(LLVMContext &Ctx) override {
+    auto I32 = Type::getInt32Ty(Ctx);
+    auto Ptr = PointerType::getUnqual(Ctx);
+    return ArrayType::get(StructType::get(Ctx, {I32, I32, Ptr, Ptr}), 1);
+  }
+
+  Type *vaListParameterType(Module &M) override {
+    return PointerType::getUnqual(M.getContext());
+  }
+
+  Value *initializeVAList(LLVMContext &Ctx, IRBuilder<> &Builder,
+                          AllocaInst *VaList, Value *VoidBuffer) override {
+    assert(VaList->getAllocatedType() == vaListType(Ctx));
+
+    Type *VaListTy = vaListType(Ctx);
+
+    Type *I32 = Type::getInt32Ty(Ctx);
+    Type *I64 = Type::getInt64Ty(Ctx);
+
+    Value *Idxs[3] = {
+        ConstantInt::get(I64, 0),
+        ConstantInt::get(I32, 0),
+        nullptr,
+    };
+
+    Idxs[2] = ConstantInt::get(I32, 0);
+    Builder.CreateStore(
+        ConstantInt::get(I32, 48),
+        Builder.CreateInBoundsGEP(VaListTy, VaList, Idxs, "gp_offset"));
+
+    Idxs[2] = ConstantInt::get(I32, 1);
+    Builder.CreateStore(
+        ConstantInt::get(I32, 6 * 8 + 8 * 16),
----------------
JonChesterfield wrote:

Oh right. This needs a whole comment block on why this collection of stores is known to create a va_list which is correct on x64 since that's not very obvious. I'll think about how to express that and probably write down the aarch64 one as well, it's the same sort of construct.

https://github.com/llvm/llvm-project/pull/89007


More information about the cfe-commits mailing list