[llvm] Port Swift's merge function pass to llvm: merging functions that differ in constants (PR #68235)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 27 06:01:51 PDT 2023
================
@@ -0,0 +1,1416 @@
+//===--- MergeFunctionsIgnoringConst.cpp - Merge functions ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass looks for similar functions that are mergeable and folds them.
+// The implementation is similar to LLVM's MergeFunctions pass. Instead of
+// merging identical functions, it merges functions which only differ by a few
+// constants in certain instructions.
+// This is copied from Swift's implementation.
+//
+// This pass should run after LLVM's MergeFunctions pass, because it works best
+// if there are no _identical_ functions in the module.
+// Note: it would also work for identical functions but could produce more
+// code overhead than the LLVM pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/MergeFunctionsIgnoringConst.h"
+// #include "llvm/Transforms/Utils/GlobalMergeFunctions.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Transforms/Utils/FunctionComparatorIgnoringConst.h"
+// #include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/ObjCARCUtil.h"
+#include "llvm/ADT/StableHashing.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+// #include "llvm/IR/GlobalPtrAuthInfo.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/StructuralHash.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/IR/ValueMap.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO.h"
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "mergefunc-ignoring-const"
+
+STATISTIC(NumFunctionsMergedIgnoringConst, "Number of functions merged");
+STATISTIC(NumThunksWrittenIgnoringConst, "Number of thunks generated");
+
+static cl::opt<bool>
+ EnableMergeFunc2("enable-merge-func2", cl::init(false), cl::Hidden,
+ cl::desc("Enable more aggressive function merger"));
+
+static cl::opt<unsigned> NumFunctionsIgnoringConstForSanityCheck(
+ "mergefunc-ignoringconst-sanity",
+ cl::desc("How many functions in module could be used for "
+ "MergeFunctionsIgnoringConst pass sanity check. "
+ "'0' disables this check. Works only with '-debug' key."),
+ cl::init(0), cl::Hidden);
+
+static cl::opt<unsigned> IgnoringConstMergeThreshold(
+ "mergefunc-ignoringconst-threshold",
+ cl::desc("Functions larger than the threshold are considered for merging."
+ "'0' disables function merging at all."),
+ cl::init(15), cl::Hidden);
+
+cl::opt<bool> UseLinkOnceODRLinkageMerging(
+ "use-linkonceodr-linkage-merging", cl::init(false), cl::Hidden,
+ cl::desc(
+ "Use LinkeOnceODR linkage to deduplicate the identical merged function "
+ "(default = off)"));
+
+cl::opt<bool> NoInlineForMergedFunction(
+ "no-inline-merged-function", cl::init(false), cl::Hidden,
+ cl::desc("set noinline for merged function (default = off)"));
+
+static cl::opt<bool>
+ CastArrayType("merge-cast-array-type", cl::init(false), cl::Hidden,
+ cl::desc("support for casting array type (default = off)"));
+
+static cl::opt<bool> IgnoreMusttailFunction(
+ "ignore-musttail-function", cl::init(false), cl::Hidden,
+ cl::desc(
+ "ignore functions containing callsites with musttail (default = off)"));
+
+static cl::opt<bool> AlwaysCallThunk(
+ "merge-always-call-thunk", cl::init(false), cl::Hidden,
+ cl::desc(
+ "do not replace callsites and always emit a thunk (default = off)"));
+
+static cl::list<std::string> MergeBlockRegexFilters(
+ "merge-block-regex", cl::Optional,
+ cl::desc("Block functions from merging if they match the given "
+ "regular expression"),
+ cl::ZeroOrMore);
+
+static cl::list<std::string> MergeAllowRegexFilters(
+ "merge-allow-regex", cl::Optional,
+ cl::desc("Allow functions from merging if they match the given "
+ "regular expression"),
+ cl::ZeroOrMore);
+
+bool isEligibleInstrunctionForConstantSharing(const Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::Load:
+ case Instruction::Store:
+ case Instruction::Call:
+ return true;
+ default: {
+ if (EnableMergeFunc2 && I->getOpcode() == Instruction::Invoke)
+ return true;
+ return false;
+ }
+ }
+}
+
+/// Returns true if the \opIdx operand of \p CI is the callee operand.
+static bool isCalleeOperand(const CallBase *CI, unsigned opIdx) {
+ return &CI->getCalledOperandUse() == &CI->getOperandUse(opIdx);
+}
+
+static bool canParameterizeCallOperand(const CallBase *CI, unsigned opIdx) {
+ if (CI->isInlineAsm())
+ return false;
+ Function *Callee = CI->getCalledOperand()
+ ? dyn_cast_or_null<Function>(
+ CI->getCalledOperand()->stripPointerCasts())
+ : nullptr;
+ if (Callee) {
+ if (Callee->isIntrinsic())
+ return false;
+ // objc_msgSend stubs must be called, and can't have their address taken.
+ if (Callee->getName().startswith("objc_msgSend$"))
+ return false;
+ }
+ if (isCalleeOperand(CI, opIdx) &&
+ CI->getOperandBundle(LLVMContext::OB_ptrauth).has_value()) {
+ // The operand is the callee and it has already been signed. Ignore this
+ // because we cannot add another ptrauth bundle to the call instruction.
+ return false;
+ }
+ return true;
+}
+
+bool isEligibleOperandForConstantSharing(const Instruction *I, unsigned OpIdx) {
+ assert(OpIdx < I->getNumOperands() && "Invalid operand index");
+
+ if (!isEligibleInstrunctionForConstantSharing(I))
+ return false;
+
+ auto Opnd = I->getOperand(OpIdx);
+ if (!isa<Constant>(Opnd))
+ return false;
+
+ if (const auto *CI = dyn_cast<CallBase>(I))
+ return canParameterizeCallOperand(CI, OpIdx);
+
+ return true;
+}
+
+namespace {
+
+/// MergeFuncIgnoringConst finds functions which only differ by constants in
+/// certain instructions, e.g. resulting from specialized functions of layout
+/// compatible types.
+/// Such functions are merged by replacing the differing constants by a
+/// parameter. The original functions are replaced by thunks which call the
+/// merged function with the specific argument constants.
+///
+class MergeFuncIgnoringConstImpl { // : public ModulePass {
+public:
+ MergeFuncIgnoringConstImpl(bool ptrAuthEnabled, unsigned ptrAuthKey, std::string suffix)
+ : FnTree(FunctionNodeCmp(&GlobalNumbers)), ptrAuthOptionsSet(true),
+ ptrAuthEnabled(ptrAuthEnabled), ptrAuthKey(ptrAuthKey), mergeFuncSuffix(suffix) {}
+
+ bool runImpl(Module &M);
+
+private:
+ struct FunctionEntry;
+
+ /// Describes the set of functions which are considered as "equivalent" (i.e.
+ /// only differing by some constants).
+ struct EquivalenceClass {
+ /// The single-linked list of all functions which are a member of this
+ /// equivalence class.
+ FunctionEntry *First;
+
+ /// A very cheap hash, used to early exit if functions do not match.
+ llvm::IRHash Hash;
+
+ public:
+ // Note the hash is recalculated potentially multiple times, but it is
+ // cheap.
+ EquivalenceClass(FunctionEntry *First)
+ : First(First), Hash(StructuralHash(*First->F)) {
+ assert(!First->Next);
+ }
+ };
+
+ /// The function comparison operator is provided here so that FunctionNodes do
+ /// not need to become larger with another pointer.
+ class FunctionNodeCmp {
+ GlobalNumberState *GlobalNumbers;
+
+ public:
+ FunctionNodeCmp(GlobalNumberState *GN) : GlobalNumbers(GN) {}
+ bool operator()(const EquivalenceClass &LHS,
+ const EquivalenceClass &RHS) const {
+ // Order first by hashes, then full function comparison.
+ if (LHS.Hash != RHS.Hash)
+ return LHS.Hash < RHS.Hash;
+ FunctionComparatorIgnoringConst FCmp(LHS.First->F, RHS.First->F,
+ GlobalNumbers);
+ return FCmp.compareIgnoringConsts() == -1;
+ }
+ };
+ using FnTreeType = std::set<EquivalenceClass, FunctionNodeCmp>;
+
+ ///
+ struct FunctionEntry {
+ FunctionEntry(Function *F, FnTreeType::iterator I)
+ : F(F), Next(nullptr), numUnhandledCallees(0), TreeIter(I),
+ isMerged(false) {}
+
+ /// Back-link to the function.
+ AssertingVH<Function> F;
+
+ /// The next function in its equivalence class.
+ FunctionEntry *Next;
+
+ /// The number of not-yet merged callees. Used to process the merging in
+ /// bottom-up call order.
+ /// This is only valid in the first entry of an equivalence class. The
+ /// counts of all functions in an equivalence class are accumulated in the
+ /// first entry.
+ int numUnhandledCallees;
+
+ /// The iterator of the function's equivalence class in the FnTree.
+ /// It's FnTree.end() if the function is not in an equivalence class.
+ FnTreeType::iterator TreeIter;
+
+ /// True if this function is already a thunk, calling the merged function.
+ bool isMerged;
+ };
+
+ /// Describes an operator of a specific instruction.
+ struct OpLocation {
+ Instruction *I;
+ unsigned OpIndex;
+ };
+
+ /// Information for a function. Used during merging.
+ struct FunctionInfo {
+
+ FunctionInfo(Function *F)
+ : F(F), CurrentInst(nullptr), NumParamsNeeded(0) {}
+
+ void init() {
+ CurrentInst = &*F->begin()->begin();
+ NumParamsNeeded = 0;
+ }
+
+ /// Advances the current instruction to the next instruction.
+ void nextInst() {
+ assert(CurrentInst);
+ if (CurrentInst->isTerminator()) {
+ auto BlockIter = std::next(CurrentInst->getParent()->getIterator());
+ if (BlockIter == F->end()) {
+ CurrentInst = nullptr;
+ return;
+ }
+ CurrentInst = &*BlockIter->begin();
+ return;
+ }
+ CurrentInst = &*std::next(CurrentInst->getIterator());
+ }
+
+ /// Returns true if the operand \p OpIdx of the current instruction is the
+ /// callee of a call, which needs to be signed if passed as a parameter.
+ bool needsPointerSigning(unsigned OpIdx) const {
+ if (auto *CI = dyn_cast<CallInst>(CurrentInst))
+ return isCalleeOperand(CI, OpIdx);
+ return false;
+ }
+
+ Function *F;
+
+ /// The current instruction while iterating over all instructions.
+ Instruction *CurrentInst;
+
+ /// Roughly the number of parameters needed if this function would be
+ /// merged with the first function of the equivalence class.
+ int NumParamsNeeded;
+ };
+
+ using FunctionInfos = SmallVector<FunctionInfo, 8>;
+
+ /// Describes a parameter which we create to parameterize the merged function.
+ struct ParamInfo {
+ /// The value of the parameter for all the functions in the equivalence
+ /// class.
+ SmallVector<Constant *, 8> Values;
+
+ /// All uses of the parameter in the merged function.
+ SmallVector<OpLocation, 16> Uses;
+
+ /// The discriminator for pointer signing.
+ /// Only not null if needsPointerSigning is true.
+ ConstantInt *discriminator = nullptr;
+
+ /// True if the value is a callee function, which needs to be signed if
+ /// passed as a parameter.
+ bool needsPointerSigning = false;
----------------
fhahn wrote:
throughout the patch: LLVM style guide uses capitalized variable names.
https://github.com/llvm/llvm-project/pull/68235
More information about the llvm-commits
mailing list