[llvm] [CGData] Global Merge Functions (PR #112671)
Kyungwoo Lee via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 4 21:11:57 PST 2024
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/112671
>From 4e2d83d8675799da1a5c6b57d285ecefc0285c71 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Fri, 30 Aug 2024 00:09:09 -0700
Subject: [PATCH 1/2] [CGData] Global Merge Functions
---
llvm/include/llvm/CGData/CodeGenData.h | 11 +
llvm/include/llvm/InitializePasses.h | 1 +
llvm/include/llvm/LinkAllPasses.h | 1 +
llvm/include/llvm/Transforms/IPO.h | 2 +
.../Transforms/IPO/GlobalMergeFunctions.h | 77 ++
llvm/lib/CodeGen/TargetPassConfig.cpp | 3 +
llvm/lib/LTO/LTO.cpp | 1 +
llvm/lib/Transforms/IPO/CMakeLists.txt | 2 +
.../Transforms/IPO/GlobalMergeFunctions.cpp | 747 ++++++++++++++++++
.../ThinLTO/AArch64/cgdata-merge-local.ll | 62 ++
.../test/ThinLTO/AArch64/cgdata-merge-read.ll | 82 ++
.../AArch64/cgdata-merge-two-rounds.ll | 68 ++
.../ThinLTO/AArch64/cgdata-merge-write.ll | 97 +++
llvm/tools/llvm-lto2/CMakeLists.txt | 1 +
llvm/tools/llvm-lto2/llvm-lto2.cpp | 6 +
15 files changed, 1161 insertions(+)
create mode 100644 llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h
create mode 100644 llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp
create mode 100644 llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll
create mode 100644 llvm/test/ThinLTO/AArch64/cgdata-merge-read.ll
create mode 100644 llvm/test/ThinLTO/AArch64/cgdata-merge-two-rounds.ll
create mode 100644 llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll
diff --git a/llvm/include/llvm/CGData/CodeGenData.h b/llvm/include/llvm/CGData/CodeGenData.h
index 5d7c74725ccef1..da0e412f2a0e03 100644
--- a/llvm/include/llvm/CGData/CodeGenData.h
+++ b/llvm/include/llvm/CGData/CodeGenData.h
@@ -145,6 +145,9 @@ class CodeGenData {
const OutlinedHashTree *getOutlinedHashTree() {
return PublishedHashTree.get();
}
+ const StableFunctionMap *getStableFunctionMap() {
+ return PublishedStableFunctionMap.get();
+ }
/// Returns true if we should write codegen data.
bool emitCGData() { return EmitCGData; }
@@ -169,10 +172,18 @@ inline bool hasOutlinedHashTree() {
return CodeGenData::getInstance().hasOutlinedHashTree();
}
+inline bool hasStableFunctionMap() {
+ return CodeGenData::getInstance().hasStableFunctionMap();
+}
+
inline const OutlinedHashTree *getOutlinedHashTree() {
return CodeGenData::getInstance().getOutlinedHashTree();
}
+inline const StableFunctionMap *getStableFunctionMap() {
+ return CodeGenData::getInstance().getStableFunctionMap();
+}
+
inline bool emitCGData() { return CodeGenData::getInstance().emitCGData(); }
inline void
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 54c070401ec8a4..a43a96e90543cc 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -123,6 +123,7 @@ void initializeGCEmptyBasicBlocksPass(PassRegistry &);
void initializeGCMachineCodeAnalysisPass(PassRegistry &);
void initializeGCModuleInfoPass(PassRegistry &);
void initializeGVNLegacyPassPass(PassRegistry &);
+void initializeGlobalMergeFuncPass(PassRegistry &);
void initializeGlobalMergePass(PassRegistry &);
void initializeGlobalsAAWrapperPassPass(PassRegistry &);
void initializeHardwareLoopsLegacyPass(PassRegistry &);
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index 3516b47d29ef36..d1219d6ee2a13e 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -79,6 +79,7 @@ struct ForcePassLinking {
(void)llvm::createDomOnlyViewerWrapperPassPass();
(void)llvm::createDomViewerWrapperPassPass();
(void)llvm::createAlwaysInlinerLegacyPass();
+ (void)llvm::createGlobalMergeFuncPass();
(void)llvm::createGlobalsAAWrapperPass();
(void)llvm::createInstSimplifyLegacyPass();
(void)llvm::createInstructionCombiningPass();
diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h
index ee0e35aa618325..86a8654f56997c 100644
--- a/llvm/include/llvm/Transforms/IPO.h
+++ b/llvm/include/llvm/Transforms/IPO.h
@@ -55,6 +55,8 @@ enum class PassSummaryAction {
Export, ///< Export information to summary.
};
+Pass *createGlobalMergeFuncPass();
+
} // End llvm namespace
#endif
diff --git a/llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h b/llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h
new file mode 100644
index 00000000000000..565be54f89e882
--- /dev/null
+++ b/llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h
@@ -0,0 +1,77 @@
+//===------ GlobalMergeFunctions.h - Global merge functions -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file defines global merge functions pass and related data structure.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H
+#define PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StableHashing.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CGData/StableFunctionMap.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include <map>
+#include <mutex>
+
+enum class HashFunctionMode {
+ Local,
+ BuildingHashFuncion,
+ UsingHashFunction,
+};
+
+namespace llvm {
+
+// A vector of locations (the pair of (instruction, operand) indices) reachable
+// from a parameter.
+using ParamLocs = SmallVector<IndexPair, 4>;
+// A vector of parameters
+using ParamLocsVecTy = SmallVector<ParamLocs, 8>;
+// A map of stable hash to a vector of stable functions
+
+/// GlobalMergeFunc finds functions which only differ by constants in
+/// certain instructions, e.g. resulting from specialized functions of layout
+/// compatible types.
+/// Unlike PikaMergeFunc that directly compares IRs, this uses stable function
+/// hash to find the merge candidate. Similar to the global outliner, we can run
+/// codegen twice to collect function merge candidate in the first round, and
+/// merge functions globally in the second round.
+class GlobalMergeFunc : public ModulePass {
+ HashFunctionMode MergerMode = HashFunctionMode::Local;
+
+ std::unique_ptr<StableFunctionMap> LocalFunctionMap;
+
+public:
+ static char ID;
+
+ GlobalMergeFunc();
+
+ StringRef getPassName() const override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ void initializeMergerMode(const Module &M);
+
+ bool runOnModule(Module &M) override;
+
+ /// Analyze module to create stable function into LocalFunctionMap.
+ void analyze(Module &M);
+
+ /// Emit LocalFunctionMap into __llvm_merge section.
+ void emitFunctionMap(Module &M);
+
+ /// Merge functions in the module using the global function map.
+ bool merge(Module &M, const StableFunctionMap *FunctionMap);
+};
+
+} // end namespace llvm
+#endif // PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index aff74104006e5a..beeb8ea2d078d9 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -141,6 +141,9 @@ static cl::opt<RunOutliner> EnableMachineOutliner(
"Disable all outlining"),
// Sentinel value for unspecified option.
clEnumValN(RunOutliner::AlwaysOutline, "", "")));
+cl::opt<bool> EnableGlobalMergeFunc(
+ "enable-global-merge-func", cl::Hidden,
+ cl::desc("Enable global merge functions that are based on hash function"));
// Disable the pass to fix unwind information. Whether the pass is included in
// the pipeline is controlled via the target options, this option serves as
// manual override.
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 0f53c608512171..3f88c6c475d4d8 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -53,6 +53,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/GlobalMergeFunctions.h"
#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt
index 15cb57399d2460..6a36ed64149f93 100644
--- a/llvm/lib/Transforms/IPO/CMakeLists.txt
+++ b/llvm/lib/Transforms/IPO/CMakeLists.txt
@@ -21,6 +21,7 @@ add_llvm_component_library(LLVMipo
GlobalDCE.cpp
GlobalOpt.cpp
GlobalSplit.cpp
+ GlobalMergeFunctions.cpp
HotColdSplitting.cpp
IPO.cpp
IROutliner.cpp
@@ -60,6 +61,7 @@ add_llvm_component_library(LLVMipo
Analysis
BitReader
BitWriter
+ CGData
Core
FrontendOpenMP
InstCombine
diff --git a/llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp b/llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp
new file mode 100644
index 00000000000000..6fd1a66b5a197c
--- /dev/null
+++ b/llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp
@@ -0,0 +1,747 @@
+//===---- GlobalMergeFunctions.cpp - Global merge functions -------*- C++ -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: This implements a function merge using function hash while tracking
+// differences in Constants. This uses stable function hash to find potential
+// merge candidates. The first codegen round collects stable function hashes,
+// and determines the merge candidates that match the stable function hashes.
+// The set of parameters pointing to different Constants are also computed
+// during the stable function merge. The second codegen round uses this global
+// function info to optimistically create a merged function in each module
+// context to guarantee correct transformation. Similar to the global outliner,
+// the linker's deduplication (ICF) folds the identical merged functions to save
+// the final binary size.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/GlobalMergeFunctions.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ModuleSummaryAnalysis.h"
+#include "llvm/CGData/CodeGenData.h"
+#include "llvm/CGData/StableFunctionMap.h"
+#include "llvm/CodeGen/MachineStableHash.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/StructuralHash.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+#define DEBUG_TYPE "global-merge-func"
+
+using namespace llvm;
+using namespace llvm::support;
+
+static cl::opt<bool>
+ DisableGlobalMerging("disable-global-merging", cl::Hidden,
+ cl::desc("Disable global merging only by ignoring "
+ "the codegen data generation or use. Local "
+ "merging is still enabled within a module."),
+ cl::init(false));
+static cl::opt<unsigned> GlobalMergingMinInstrs(
+ "global-merging-min-instrs",
+ cl::desc("The minimum instruction count required when merging functions."),
+ cl::init(1), cl::Hidden);
+static cl::opt<unsigned> GlobalMergingMaxParams(
+ "global-merging-max-params",
+ cl::desc(
+ "The maximum number of parameters allowed when merging functions."),
+ cl::init(std::numeric_limits<unsigned>::max()), cl::Hidden);
+static cl::opt<unsigned> GlobalMergingParamOverhead(
+ "global-merging-param-overhead",
+ cl::desc("The overhead cost associated with each parameter when merging "
+ "functions."),
+ cl::init(2), cl::Hidden);
+static cl::opt<unsigned>
+ GlobalMergingCallOverhead("global-merging-call-overhead",
+ cl::desc("The overhead cost associated with each "
+ "function call when merging functions."),
+ cl::init(1), cl::Hidden);
+static cl::opt<unsigned> GlobalMergingExtraThreshold(
+ "global-merging-extra-threshold",
+ cl::desc("An additional cost threshold that must be exceeded for merging "
+ "to be considered beneficial."),
+ cl::init(0), cl::Hidden);
+
+extern cl::opt<bool> EnableGlobalMergeFunc;
+
+STATISTIC(NumMismatchedFunctionHashGlobalMergeFunction,
+ "Number of mismatched function hash for global merge function");
+STATISTIC(NumMismatchedInstCountGlobalMergeFunction,
+ "Number of mismatched instruction count for global merge function");
+STATISTIC(NumMismatchedConstHashGlobalMergeFunction,
+ "Number of mismatched const hash for global merge function");
+STATISTIC(NumMismatchedModuleIdGlobalMergeFunction,
+ "Number of mismatched Module Id for global merge function");
+STATISTIC(NumGlobalMergeFunctions,
+ "Number of functions that are actually merged using function hash");
+STATISTIC(NumAnalyzedModues, "Number of modules that are analyzed");
+STATISTIC(NumAnalyzedFunctions, "Number of functions that are analyzed");
+STATISTIC(NumEligibleFunctions, "Number of functions that are eligible");
+
+/// Returns true if the \opIdx operand of \p CI is the callee operand.
+static bool isCalleeOperand(const CallBase *CI, unsigned OpIdx) {
+ return &CI->getCalledOperandUse() == &CI->getOperandUse(OpIdx);
+}
+
+static bool canParameterizeCallOperand(const CallBase *CI, unsigned OpIdx) {
+ if (CI->isInlineAsm())
+ return false;
+ Function *Callee = CI->getCalledOperand()
+ ? dyn_cast_or_null<Function>(
+ CI->getCalledOperand()->stripPointerCasts())
+ : nullptr;
+ if (Callee) {
+ if (Callee->isIntrinsic())
+ return false;
+ // objc_msgSend stubs must be called, and can't have their address taken.
+ if (Callee->getName().starts_with("objc_msgSend$"))
+ return false;
+ }
+ if (isCalleeOperand(CI, OpIdx) &&
+ CI->getOperandBundle(LLVMContext::OB_ptrauth).has_value()) {
+ // The operand is the callee and it has already been signed. Ignore this
+ // because we cannot add another ptrauth bundle to the call instruction.
+ return false;
+ }
+ return true;
+}
+
+bool isEligibleInstrunctionForConstantSharing(const Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::Load:
+ case Instruction::Store:
+ case Instruction::Call:
+ case Instruction::Invoke:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool isEligibleOperandForConstantSharing(const Instruction *I, unsigned OpIdx) {
+ assert(OpIdx < I->getNumOperands() && "Invalid operand index");
+
+ if (!isEligibleInstrunctionForConstantSharing(I))
+ return false;
+
+ auto Opnd = I->getOperand(OpIdx);
+ if (!isa<Constant>(Opnd))
+ return false;
+
+ if (const auto *CI = dyn_cast<CallBase>(I))
+ return canParameterizeCallOperand(CI, OpIdx);
+
+ return true;
+}
+
+/// Returns true if function \p F is eligible for merging.
+bool isEligibleFunction(Function *F) {
+ if (F->isDeclaration())
+ return false;
+
+ if (F->hasFnAttribute(llvm::Attribute::NoMerge))
+ return false;
+
+ if (F->hasAvailableExternallyLinkage()) {
+ return false;
+ }
+
+ if (F->getFunctionType()->isVarArg()) {
+ return false;
+ }
+
+ if (F->getCallingConv() == CallingConv::SwiftTail)
+ return false;
+
+ // if function contains callsites with musttail, if we merge
+ // it, the merged function will have the musttail callsite, but
+ // the number of parameters can change, thus the parameter count
+ // of the callsite will mismatch with the function itself.
+ // if (IgnoreMusttailFunction) {
+ for (const BasicBlock &BB : *F) {
+ for (const Instruction &I : BB) {
+ const auto *CB = dyn_cast<CallBase>(&I);
+ if (CB && CB->isMustTailCall())
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static bool
+isEligibleInstrunctionForConstantSharingLocal(const Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::Load:
+ case Instruction::Store:
+ case Instruction::Call:
+ case Instruction::Invoke:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool ignoreOp(const Instruction *I, unsigned OpIdx) {
+ assert(OpIdx < I->getNumOperands() && "Invalid operand index");
+
+ if (!isEligibleInstrunctionForConstantSharingLocal(I))
+ return false;
+
+ if (!isa<Constant>(I->getOperand(OpIdx)))
+ return false;
+
+ if (const auto *CI = dyn_cast<CallBase>(I))
+ return canParameterizeCallOperand(CI, OpIdx);
+
+ return true;
+}
+
+// copy from merge functions.cpp
+static Value *createCast(IRBuilder<> &Builder, Value *V, Type *DestTy) {
+ Type *SrcTy = V->getType();
+ if (SrcTy->isStructTy()) {
+ assert(DestTy->isStructTy());
+ assert(SrcTy->getStructNumElements() == DestTy->getStructNumElements());
+ Value *Result = PoisonValue::get(DestTy);
+ for (unsigned int I = 0, E = SrcTy->getStructNumElements(); I < E; ++I) {
+ Value *Element =
+ createCast(Builder, Builder.CreateExtractValue(V, ArrayRef(I)),
+ DestTy->getStructElementType(I));
+
+ Result = Builder.CreateInsertValue(Result, Element, ArrayRef(I));
+ }
+ return Result;
+ }
+ assert(!DestTy->isStructTy());
+ if (auto *SrcAT = dyn_cast<ArrayType>(SrcTy)) {
+ auto *DestAT = dyn_cast<ArrayType>(DestTy);
+ assert(DestAT);
+ assert(SrcAT->getNumElements() == DestAT->getNumElements());
+ Value *Result = UndefValue::get(DestTy);
+ for (unsigned int I = 0, E = SrcAT->getNumElements(); I < E; ++I) {
+ Value *Element =
+ createCast(Builder, Builder.CreateExtractValue(V, ArrayRef(I)),
+ DestAT->getElementType());
+
+ Result = Builder.CreateInsertValue(Result, Element, ArrayRef(I));
+ }
+ return Result;
+ }
+ assert(!DestTy->isArrayTy());
+ if (SrcTy->isIntegerTy() && DestTy->isPointerTy())
+ return Builder.CreateIntToPtr(V, DestTy);
+ else if (SrcTy->isPointerTy() && DestTy->isIntegerTy())
+ return Builder.CreatePtrToInt(V, DestTy);
+ else
+ return Builder.CreateBitCast(V, DestTy);
+}
+
+void GlobalMergeFunc::analyze(Module &M) {
+ ++NumAnalyzedModues;
+ for (Function &Func : M) {
+ ++NumAnalyzedFunctions;
+ if (isEligibleFunction(&Func)) {
+ ++NumEligibleFunctions;
+
+ auto FI = llvm::StructuralHashWithDifferences(Func, ignoreOp);
+
+ // Convert the map to a vector for a serialization-friendly format.
+ IndexOperandHashVecType IndexOperandHashes;
+ for (auto &Pair : *FI.IndexOperandHashMap)
+ IndexOperandHashes.emplace_back(Pair);
+
+ StableFunction SF(FI.FunctionHash, get_stable_name(Func.getName()).str(),
+ M.getModuleIdentifier(), FI.IndexInstruction->size(),
+ std::move(IndexOperandHashes));
+
+ LocalFunctionMap->insert(SF);
+ }
+ }
+}
+
+/// Tuple to hold function info to process merging.
+struct FuncMergeInfo {
+ StableFunctionMap::StableFunctionEntry *SF;
+ Function *F;
+ std::unique_ptr<IndexInstrMap> IndexInstruction;
+};
+
+// Given the func info, and the parameterized locations, create and return
+// a new merged function by replacing the original constants with the new
+// parameters.
+static Function *createMergedFunction(FuncMergeInfo &FI,
+ ArrayRef<Type *> ConstParamTypes,
+ const ParamLocsVecTy &ParamLocsVec) {
+ // Synthesize a new merged function name by appending ".Tgm" to the root
+ // function's name.
+ auto *MergedFunc = FI.F;
+ auto NewFunctionName = MergedFunc->getName().str() + ".Tgm";
+ auto *M = MergedFunc->getParent();
+ assert(!M->getFunction(NewFunctionName));
+
+ FunctionType *OrigTy = MergedFunc->getFunctionType();
+ // Get the original params' types.
+ SmallVector<Type *> ParamTypes(OrigTy->param_begin(), OrigTy->param_end());
+ // Append const parameter types that are passed in.
+ ParamTypes.append(ConstParamTypes.begin(), ConstParamTypes.end());
+ FunctionType *FuncType =
+ FunctionType::get(OrigTy->getReturnType(), ParamTypes, false);
+
+ // Declare a new function
+ Function *NewFunction =
+ Function::Create(FuncType, MergedFunc->getLinkage(), NewFunctionName);
+ if (auto *SP = MergedFunc->getSubprogram())
+ NewFunction->setSubprogram(SP);
+ NewFunction->copyAttributesFrom(MergedFunc);
+ NewFunction->setDLLStorageClass(GlobalValue::DefaultStorageClass);
+
+ NewFunction->setLinkage(GlobalValue::InternalLinkage);
+ NewFunction->addFnAttr(Attribute::NoInline);
+
+ // Add the new function before the root function.
+ M->getFunctionList().insert(MergedFunc->getIterator(), NewFunction);
+
+ // Move the body of MergedFunc into the NewFunction.
+ NewFunction->splice(NewFunction->begin(), MergedFunc);
+
+ // Update the original args by the new args.
+ auto NewArgIter = NewFunction->arg_begin();
+ for (Argument &OrigArg : MergedFunc->args()) {
+ Argument &NewArg = *NewArgIter++;
+ OrigArg.replaceAllUsesWith(&NewArg);
+ }
+
+ // Replace the original Constants by the new args.
+ unsigned NumOrigArgs = MergedFunc->arg_size();
+ for (unsigned ParamIdx = 0; ParamIdx < ParamLocsVec.size(); ++ParamIdx) {
+ Argument *NewArg = NewFunction->getArg(NumOrigArgs + ParamIdx);
+ for (auto [InstIndex, OpndIndex] : ParamLocsVec[ParamIdx]) {
+ auto *Inst = FI.IndexInstruction->lookup(InstIndex);
+ auto *OrigC = Inst->getOperand(OpndIndex);
+ if (OrigC->getType() != NewArg->getType()) {
+ IRBuilder<> Builder(Inst->getParent(), Inst->getIterator());
+ Inst->setOperand(OpndIndex,
+ createCast(Builder, NewArg, OrigC->getType()));
+ } else
+ Inst->setOperand(OpndIndex, NewArg);
+ }
+ }
+
+ return NewFunction;
+}
+
+// Given the original function (Thunk) and the merged function (ToFunc), create
+// a thunk to the merged function.
+
+static void createThunk(FuncMergeInfo &FI, ArrayRef<Constant *> Params,
+ Function *ToFunc) {
+ auto *Thunk = FI.F;
+
+ assert(Thunk->arg_size() + Params.size() ==
+ ToFunc->getFunctionType()->getNumParams());
+ Thunk->dropAllReferences();
+
+ BasicBlock *BB = BasicBlock::Create(Thunk->getContext(), "", Thunk);
+ IRBuilder<> Builder(BB);
+
+ SmallVector<Value *> Args;
+ unsigned ParamIdx = 0;
+ FunctionType *ToFuncTy = ToFunc->getFunctionType();
+
+ // Add arguments which are passed through Thunk.
+ for (Argument &AI : Thunk->args()) {
+ Args.push_back(createCast(Builder, &AI, ToFuncTy->getParamType(ParamIdx)));
+ ++ParamIdx;
+ }
+
+ // Add new arguments defined by Params.
+ for (auto *Param : Params) {
+ assert(ParamIdx < ToFuncTy->getNumParams());
+ // FIXME: do not support signing
+ Args.push_back(
+ createCast(Builder, Param, ToFuncTy->getParamType(ParamIdx)));
+ ++ParamIdx;
+ }
+
+ CallInst *CI = Builder.CreateCall(ToFunc, Args);
+ bool isSwiftTailCall = ToFunc->getCallingConv() == CallingConv::SwiftTail &&
+ Thunk->getCallingConv() == CallingConv::SwiftTail;
+ CI->setTailCallKind(isSwiftTailCall ? llvm::CallInst::TCK_MustTail
+ : llvm::CallInst::TCK_Tail);
+ CI->setCallingConv(ToFunc->getCallingConv());
+ CI->setAttributes(ToFunc->getAttributes());
+ if (Thunk->getReturnType()->isVoidTy())
+ Builder.CreateRetVoid();
+ else
+ Builder.CreateRet(createCast(Builder, CI, Thunk->getReturnType()));
+}
+
+// Check if the old merged/optimized IndexOperandHashMap is compatible with
+// the current IndexOperandHashMap. An operand hash may not be stable across
+// different builds due to varying modules combined. To address this, we relax
+// the hash check condition by comparing Const hash patterns instead of absolute
+// hash values. For example, let's assume we have three Consts located at idx1,
+// idx3, and idx6, where their corresponding hashes are hash1, hash2, and hash1
+// in the old merged map below:
+// Old (Merged): [(idx1, hash1), (idx3, hash2), (idx6, hash1)]
+// Current: [(idx1, hash1'), (idx3, hash2'), (idx6, hash1')]
+// If the current function also has three Consts in the same locations,
+// with hash sequences hash1', hash2', and hash1' where the first and third
+// are the same as the old hash sequences, we consider them matched.
+static bool checkConstHashCompatible(
+ const DenseMap<IndexPair, stable_hash> &OldInstOpndIndexToConstHash,
+ const DenseMap<IndexPair, stable_hash> &CurrInstOpndIndexToConstHash) {
+
+ DenseMap<stable_hash, stable_hash> OldHashToCurrHash;
+ for (const auto &[Index, OldHash] : OldInstOpndIndexToConstHash) {
+ auto It = CurrInstOpndIndexToConstHash.find(Index);
+ if (It == CurrInstOpndIndexToConstHash.end())
+ return false;
+
+ auto CurrHash = It->second;
+ auto J = OldHashToCurrHash.find(OldHash);
+ if (J == OldHashToCurrHash.end())
+ OldHashToCurrHash.insert({OldHash, CurrHash});
+ else if (J->second != CurrHash)
+ return false;
+ }
+
+ return true;
+}
+
+// Validate the locations pointed by a param has the same hash and Constant.
+static bool
+checkConstLocationCompatible(const StableFunctionMap::StableFunctionEntry &SF,
+ const IndexInstrMap &IndexInstruction,
+ const ParamLocsVecTy &ParamLocsVec) {
+ for (auto &ParamLocs : ParamLocsVec) {
+ std::optional<stable_hash> OldHash;
+ std::optional<Constant *> OldConst;
+ for (auto &Loc : ParamLocs) {
+ assert(SF.IndexOperandHashMap->count(Loc));
+ auto CurrHash = SF.IndexOperandHashMap.get()->at(Loc);
+ auto [InstIndex, OpndIndex] = Loc;
+ assert(InstIndex < IndexInstruction.size());
+ const auto *Inst = IndexInstruction.lookup(InstIndex);
+ auto *CurrConst = cast<Constant>(Inst->getOperand(OpndIndex));
+ if (!OldHash) {
+ OldHash = CurrHash;
+ OldConst = CurrConst;
+ } else if (CurrConst != *OldConst || CurrHash != *OldHash)
+ return false;
+ }
+ }
+ return true;
+}
+
+static ParamLocsVecTy computeParamInfo(
+ const SmallVector<std::unique_ptr<StableFunctionMap::StableFunctionEntry>>
+ &SFS) {
+ std::map<std::vector<stable_hash>, ParamLocs> HashSeqToLocs;
+ auto &RSF = *SFS[0];
+ unsigned StableFunctionCount = SFS.size();
+
+ for (auto &[IndexPair, Hash] : *RSF.IndexOperandHashMap) {
+ // Const hash sequence across stable functions.
+ // We will allocate a parameter per unique hash squence.
+ // can't use SmallVector as key
+ std::vector<stable_hash> ConstHashSeq;
+ ConstHashSeq.push_back(Hash);
+ bool Identical = true;
+ for (unsigned J = 1; J < StableFunctionCount; ++J) {
+ auto &SF = SFS[J];
+ assert(SF->IndexOperandHashMap->count(IndexPair));
+ auto SHash = (*SF->IndexOperandHashMap)[IndexPair];
+ if (Hash != SHash)
+ Identical = false;
+ ConstHashSeq.push_back(SHash);
+ }
+
+ if (Identical)
+ continue;
+
+ // For each unique Const hash sequence (parameter), add the locations.
+ HashSeqToLocs[ConstHashSeq].push_back(IndexPair);
+ }
+
+ ParamLocsVecTy ParamLocsVec;
+ for (auto &[HashSeq, Locs] : HashSeqToLocs) {
+ ParamLocsVec.push_back(std::move(Locs));
+ std::sort(
+ ParamLocsVec.begin(), ParamLocsVec.end(),
+ [&](const ParamLocs &L, const ParamLocs &R) { return L[0] < R[0]; });
+ }
+ return ParamLocsVec;
+}
+
+static bool isProfitable(
+ const SmallVector<std::unique_ptr<StableFunctionMap::StableFunctionEntry>>
+ &SFS,
+ const Function *F) {
+ // No interest if the number of candidates are less than 2.
+ unsigned StableFunctionCount = SFS.size();
+ if (StableFunctionCount < 2)
+ return false;
+
+ unsigned InstCount = SFS[0]->InstCount;
+ if (InstCount < GlobalMergingMinInstrs)
+ return false;
+
+ unsigned ParamCount = SFS[0]->IndexOperandHashMap->size();
+ unsigned TotalParamCount = ParamCount + F->getFunctionType()->getNumParams();
+ if (TotalParamCount > GlobalMergingMaxParams)
+ return false;
+
+ unsigned Benefit = InstCount * (StableFunctionCount - 1);
+ unsigned Cost =
+ (GlobalMergingParamOverhead * ParamCount + GlobalMergingCallOverhead) *
+ StableFunctionCount +
+ GlobalMergingExtraThreshold;
+
+ bool Result = Benefit > Cost;
+ LLVM_DEBUG(dbgs() << "isProfitable: Function = " << F->getName() << ", "
+ << "StableFunctionCount = " << StableFunctionCount
+ << ", InstCount = " << InstCount
+ << ", ParamCount = " << ParamCount
+ << ", Benefit = " << Benefit << ", Cost = " << Cost
+ << ", Result = " << (Result ? "true" : "false") << "\n");
+ return Result;
+}
+
+bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
+ bool Changed = false;
+
+ // Build a map from stable function name to function.
+ StringMap<Function *> StableNameToFuncMap;
+ for (auto &F : M)
+ StableNameToFuncMap[get_stable_name(F.getName())] = &F;
+ // Track merged functions
+ DenseSet<Function *> MergedFunctions;
+
+ auto ModId = M.getModuleIdentifier();
+ for (auto &[Hash, SFS] : FunctionMap->getFunctionMap()) {
+ // Compute the parameter locations based on the unique hash sequences
+ // across the candidates.
+ auto ParamLocsVec = computeParamInfo(SFS);
+ LLVM_DEBUG({
+ dbgs() << "[GlobalMergeFunc] Merging hash: " << Hash << " with Params "
+ << ParamLocsVec.size() << "\n";
+ });
+
+ Function *MergedFunc = nullptr;
+ std::string MergedModId;
+ SmallVector<FuncMergeInfo> FuncMergeInfos;
+ for (auto &SF : SFS) {
+ // Get the function from the stable name.
+ auto I = StableNameToFuncMap.find(
+ *FunctionMap->getNameForId(SF->FunctionNameId));
+ if (I == StableNameToFuncMap.end())
+ continue;
+ Function *F = I->second;
+ assert(F);
+ // Skip if the function has been merged before.
+ if (MergedFunctions.count(F))
+ continue;
+ // Consider the function if it is eligible for merging.
+ if (!isEligibleFunction(F))
+ continue;
+
+ auto FI = llvm::StructuralHashWithDifferences(*F, ignoreOp);
+ uint64_t FuncHash = FI.FunctionHash;
+ if (Hash != FuncHash) {
+ ++NumMismatchedFunctionHashGlobalMergeFunction;
+ continue;
+ }
+
+ if (SF->InstCount != FI.IndexInstruction->size()) {
+ ++NumMismatchedInstCountGlobalMergeFunction;
+ continue;
+ }
+ bool HasValidSharedConst = true;
+ for (auto &[Index, Hash] : *SF->IndexOperandHashMap) {
+ auto [InstIndex, OpndIndex] = Index;
+ assert(InstIndex < FI.IndexInstruction->size());
+ auto *Inst = FI.IndexInstruction->lookup(InstIndex);
+ if (!isEligibleOperandForConstantSharing(Inst, OpndIndex)) {
+ HasValidSharedConst = false;
+ break;
+ }
+ }
+ if (!HasValidSharedConst) {
+ ++NumMismatchedConstHashGlobalMergeFunction;
+ continue;
+ }
+ if (!checkConstHashCompatible(*SF->IndexOperandHashMap,
+ *FI.IndexOperandHashMap)) {
+ ++NumMismatchedConstHashGlobalMergeFunction;
+ continue;
+ }
+ if (!checkConstLocationCompatible(*SF, *FI.IndexInstruction,
+ ParamLocsVec)) {
+ ++NumMismatchedConstHashGlobalMergeFunction;
+ continue;
+ }
+
+ if (!isProfitable(SFS, F))
+ break;
+
+ if (MergedFunc) {
+ // Check if the matched functions fall into the same (first) module.
+ // This module check is not strictly necessary as the functions can move
+ // around. We just want to avoid merging functions from different
+ // modules than the first one in the functon map, as they may not end up
+ // with not being ICFed.
+ if (MergedModId != *FunctionMap->getNameForId(SF->ModuleNameId)) {
+ ++NumMismatchedModuleIdGlobalMergeFunction;
+ continue;
+ }
+ } else {
+ MergedFunc = F;
+ MergedModId = *FunctionMap->getNameForId(SF->ModuleNameId);
+ }
+
+ FuncMergeInfos.push_back({SF.get(), F, std::move(FI.IndexInstruction)});
+ MergedFunctions.insert(F);
+ }
+ unsigned FuncMergeInfoSize = FuncMergeInfos.size();
+ if (FuncMergeInfoSize == 0)
+ continue;
+
+ LLVM_DEBUG({
+ dbgs() << "[GlobalMergeFunc] Merging function count " << FuncMergeInfoSize
+ << " in " << ModId << "\n";
+ });
+ for (auto &FMI : FuncMergeInfos) {
+ Changed = true;
+
+ // We've already validated all locations of constant operands pointed by
+ // the parameters. Just use the first one to bookkeep the original
+ // constants for each parameter
+ SmallVector<Constant *> Params;
+ SmallVector<Type *> ParamTypes;
+ for (auto &ParamLocs : ParamLocsVec) {
+ assert(!ParamLocs.empty());
+ auto &[InstIndex, OpndIndex] = ParamLocs[0];
+ auto *Inst = FMI.IndexInstruction->lookup(InstIndex);
+ auto *Opnd = cast<Constant>(Inst->getOperand(OpndIndex));
+ Params.push_back(Opnd);
+ ParamTypes.push_back(Opnd->getType());
+ }
+
+ // Create a merged function derived from the first function in the current
+ // module context.
+ Function *MergedFunc =
+ createMergedFunction(FMI, ParamTypes, ParamLocsVec);
+
+ LLVM_DEBUG({
+ dbgs() << "[GlobalMergeFunc] Merged function (hash:" << FMI.SF->Hash
+ << ") " << MergedFunc->getName() << " generated from "
+ << FMI.F->getName() << ":\n";
+ MergedFunc->dump();
+ });
+
+ // Create a thunk to the merged function.
+ createThunk(FMI, Params, MergedFunc);
+ LLVM_DEBUG({
+ dbgs() << "[GlobalMergeFunc] Thunk generated: \n";
+ FMI.F->dump();
+ });
+ ++NumGlobalMergeFunctions;
+ }
+ }
+
+ return Changed;
+}
+
+char GlobalMergeFunc::ID = 0;
+INITIALIZE_PASS_BEGIN(GlobalMergeFunc, "global-merge-func",
+ "Global merge function pass", false, false)
+INITIALIZE_PASS_END(GlobalMergeFunc, "global-merge-func",
+ "Global merge function pass", false, false)
+
+StringRef GlobalMergeFunc::getPassName() const {
+ return "Global Merge Functions";
+}
+
+void GlobalMergeFunc::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addUsedIfAvailable<ImmutableModuleSummaryIndexWrapperPass>();
+ AU.setPreservesAll();
+ ModulePass::getAnalysisUsage(AU);
+}
+
+GlobalMergeFunc::GlobalMergeFunc() : ModulePass(ID) {
+ initializeGlobalMergeFuncPass(*llvm::PassRegistry::getPassRegistry());
+}
+
+namespace llvm {
+Pass *createGlobalMergeFuncPass() { return new GlobalMergeFunc(); }
+} // namespace llvm
+
+void GlobalMergeFunc::initializeMergerMode(const Module &M) {
+ LocalFunctionMap = std::make_unique<StableFunctionMap>();
+
+ if (DisableGlobalMerging)
+ return;
+
+ if (auto *IndexWrapperPass =
+ getAnalysisIfAvailable<ImmutableModuleSummaryIndexWrapperPass>()) {
+ auto *TheIndex = IndexWrapperPass->getIndex();
+ // (Full)LTO module does not have functions added to the index.
+ // In this case, we run a local merger without using codegen data.
+ if (TheIndex && !TheIndex->hasExportedFunctions(M))
+ return;
+ }
+
+ if (cgdata::emitCGData())
+ MergerMode = HashFunctionMode::BuildingHashFuncion;
+ else if (cgdata::hasStableFunctionMap())
+ MergerMode = HashFunctionMode::UsingHashFunction;
+}
+
+void GlobalMergeFunc::emitFunctionMap(Module &M) {
+ LLVM_DEBUG({
+ dbgs() << "Emit function map. Size: " << LocalFunctionMap->size() << "\n";
+ });
+ // No need to emit the function map if it is empty.
+ if (LocalFunctionMap->empty())
+ return;
+ SmallVector<char> Buf;
+ raw_svector_ostream OS(Buf);
+
+ StableFunctionMapRecord::serialize(OS, LocalFunctionMap.get());
+
+ std::unique_ptr<MemoryBuffer> Buffer = MemoryBuffer::getMemBuffer(
+ OS.str(), "in-memory stable function map", false);
+
+ Triple TT(M.getTargetTriple());
+ embedBufferInModule(M, *Buffer.get(),
+ getCodeGenDataSectionName(CG_merge, TT.getObjectFormat()),
+ Align(4));
+}
+
+bool GlobalMergeFunc::runOnModule(Module &M) {
+ initializeMergerMode(M);
+
+ const StableFunctionMap *FuncMap;
+ if (MergerMode == HashFunctionMode::UsingHashFunction) {
+ // Use the prior CG data to optimistically create global merge candidates.
+ FuncMap = cgdata::getStableFunctionMap();
+ } else {
+ analyze(M);
+ // Emit the local function map to the custom section, __llvm_merge before
+ // finalizing it.
+ if (MergerMode == HashFunctionMode::BuildingHashFuncion)
+ emitFunctionMap(M);
+ LocalFunctionMap->finalize();
+ FuncMap = LocalFunctionMap.get();
+ }
+
+ return merge(M, FuncMap);
+}
diff --git a/llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll b/llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll
new file mode 100644
index 00000000000000..4f5f5c0b5b26d9
--- /dev/null
+++ b/llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll
@@ -0,0 +1,62 @@
+; This test checks if two similar functions, f1 and f2, can be merged locally within a single module
+; while parameterizing a difference in their global variables, g1 and g2.
+; To achieve this, we create two instances of the global merging function, f1.Tgm and f2.Tgm,
+; which are tail-called from thunks g1 and g2 respectively.
+; These identical functions, f1.Tgm and f2.Tgm, will be folded by the linker via Identical Code Folding (IFC).
+
+; RUN: opt -module-summary -module-hash %s -o %t
+
+; RUN: llvm-lto2 run -enable-global-merge-func=false %t -o %tout-nomerge \
+; RUN: -r %t,_f1,px \
+; RUN: -r %t,_f2,px \
+; RUN: -r %t,_g,l -r %t,_g1,l -r %t,_g2,l
+; RUN: llvm-nm %tout-nomerge.1 | FileCheck %s --check-prefix=NOMERGE
+; RUN: llvm-lto2 run -enable-global-merge-func=true %t -o %tout-merge \
+; RUN: -r %t,_f1,px \
+; RUN: -r %t,_f2,px \
+; RUN: -r %t,_g,l -r %t,_g1,l -r %t,_g2,l
+; RUN: llvm-nm %tout-merge.1 | FileCheck %s --check-prefix=GLOBALMERGE
+; RUN: llvm-objdump -d %tout-merge.1 | FileCheck %s --check-prefix=THUNK
+
+; NOMERGE-NOT: _f1.Tgm
+; GLOBALMERGE: _f1.Tgm
+; GLOBALMERGE: _f2.Tgm
+
+; THUNK: <_f1>:
+; THUNK-NEXT: adrp x1,
+; THUNK-NEXT: ldr x1, [x1]
+; THUNK-NEXT: b
+
+; THUNK: <_f2>:
+; THUNK-NEXT: adrp x1,
+; THUNK-NEXT: ldr x1, [x1]
+; THUNK-NEXT: b
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-unknown-ios12.0.0"
+
+ at g = external local_unnamed_addr global [0 x i32], align 4
+ at g1 = external global i32, align 4
+ at g2 = external global i32, align 4
+
+define i32 @f1(i32 %a) {
+entry:
+ %idxprom = sext i32 %a to i64
+ %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %1 = load volatile i32, i32* @g1, align 4
+ %mul = mul nsw i32 %1, %0
+ %add = add nsw i32 %mul, 1
+ ret i32 %add
+}
+
+define i32 @f2(i32 %a) {
+entry:
+ %idxprom = sext i32 %a to i64
+ %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %1 = load volatile i32, i32* @g2, align 4
+ %mul = mul nsw i32 %1, %0
+ %add = add nsw i32 %mul, 1
+ ret i32 %add
+}
diff --git a/llvm/test/ThinLTO/AArch64/cgdata-merge-read.ll b/llvm/test/ThinLTO/AArch64/cgdata-merge-read.ll
new file mode 100644
index 00000000000000..da756e7f15e68e
--- /dev/null
+++ b/llvm/test/ThinLTO/AArch64/cgdata-merge-read.ll
@@ -0,0 +1,82 @@
+; This test demonstrates how similar functions are handled during global outlining.
+; Currently, we do not attempt to share an merged function for identical sequences.
+; Instead, each merging instance is created uniquely.
+
+; RUN: rm -rf %t; split-file %s %t
+
+; RUN: opt -module-summary -module-hash %t/foo.ll -o %t-foo.bc
+; RUN: opt -module-summary -module-hash %t/goo.ll -o %t-goo.bc
+
+; First, run with -codegen-data-generate=true to generate the cgdata in the object files.
+; Using llvm-cgdata, merge the cg data.
+; RUN: llvm-lto2 run -enable-global-merge-func=true -codegen-data-generate=true %t-foo.bc %t-goo.bc -o %tout-write \
+; RUN: -r %t-foo.bc,_f1,px \
+; RUN: -r %t-goo.bc,_f2,px \
+; RUN: -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \
+; RUN: -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l
+; RUN: llvm-cgdata --merge -o %tout.cgdata %tout-write.1 %tout-write.2
+
+; Now run with -codegen-data-use-path=%tout.cgdata to optimize the binary.
+; Each module has its own merging instance as it is matched against the merged cgdata.
+; RUN: llvm-lto2 run -enable-global-merge-func=true \
+; RUN: -codegen-data-use-path=%tout.cgdata \
+; RUN: %t-foo.bc %t-goo.bc -o %tout-read \
+; RUN: -r %t-foo.bc,_f1,px \
+; RUN: -r %t-goo.bc,_f2,px \
+; RUN: -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \
+; RUN: -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l
+; RUN: llvm-nm %tout-read.1 | FileCheck %s --check-prefix=READ1
+; RUN: llvm-nm %tout-read.2 | FileCheck %s --check-prefix=READ2
+; RUN: llvm-objdump -d %tout-read.1 | FileCheck %s --check-prefix=THUNK1
+; RUN: llvm-objdump -d %tout-read.2 | FileCheck %s --check-prefix=THUNK2
+
+; READ1: _f1.Tgm
+; READ2: _f2.Tgm
+
+; THUNK1: <_f1>:
+; THUNK1-NEXT: adrp x1,
+; THUNK1-NEXT: ldr x1, [x1]
+; THUNK1-NEXT: b
+
+; THUNK2: <_f2>:
+; THUNK2-NEXT: adrp x1,
+; THUNK2-NEXT: ldr x1, [x1]
+; THUNK2-NEXT: b
+
+;--- foo.ll
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-unknown-ios12.0.0"
+
+ at g = external local_unnamed_addr global [0 x i32], align 4
+ at g1 = external global i32, align 4
+ at g2 = external global i32, align 4
+
+define i32 @f1(i32 %a) {
+entry:
+ %idxprom = sext i32 %a to i64
+ %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %1 = load volatile i32, i32* @g1, align 4
+ %mul = mul nsw i32 %1, %0
+ %add = add nsw i32 %mul, 1
+ ret i32 %add
+}
+
+;--- goo.ll
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-unknown-ios12.0.0"
+
+ at g = external local_unnamed_addr global [0 x i32], align 4
+ at g1 = external global i32, align 4
+ at g2 = external global i32, align 4
+
+define i32 @f2(i32 %a) {
+entry:
+ %idxprom = sext i32 %a to i64
+ %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %1 = load volatile i32, i32* @g2, align 4
+ %mul = mul nsw i32 %1, %0
+ %add = add nsw i32 %mul, 1
+ ret i32 %add
+}
diff --git a/llvm/test/ThinLTO/AArch64/cgdata-merge-two-rounds.ll b/llvm/test/ThinLTO/AArch64/cgdata-merge-two-rounds.ll
new file mode 100644
index 00000000000000..06880e3d268189
--- /dev/null
+++ b/llvm/test/ThinLTO/AArch64/cgdata-merge-two-rounds.ll
@@ -0,0 +1,68 @@
+; TODO: This test checks if the how similar functions are handled during global outlining
+; by repeating the codegen via -codegen-data-thinlto-two-rounds=true.
+
+; RUN: rm -rf %t; split-file %s %t
+
+; RUN: opt -module-summary -module-hash %t/foo.ll -o %t-foo.bc
+; RUN: opt -module-summary -module-hash %t/goo.ll -o %t-goo.bc
+
+; RUN: llvm-lto2 run -enable-global-merge-func=true -codegen-data-thinlto-two-rounds=true %t-foo.bc %t-goo.bc -o %tout \
+; RUN: -r %t-foo.bc,_f1,px \
+; RUN: -r %t-goo.bc,_f2,px \
+; RUN: -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \
+; RUN: -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l
+; RUN: llvm-nm %tout.1 | FileCheck %s --check-prefix=OUT1
+; RUN: llvm-nm %tout.2 | FileCheck %s --check-prefix=OUT2
+; RUN: llvm-objdump -d %tout.1 | FileCheck %s --check-prefix=THUNK1
+; RUN: llvm-objdump -d %tout.2 | FileCheck %s --check-prefix=THUNK2
+
+; OUT1: _f1.Tgm
+; OUT2: _f2.Tgm
+
+; THUNK1: <_f1>:
+; THUNK1-NEXT: adrp x1,
+; THUNK1-NEXT: ldr x1, [x1]
+; THUNK1-NEXT: b
+
+; THUNK2: <_f2>:
+; THUNK2-NEXT: adrp x1,
+; THUNK2-NEXT: ldr x1, [x1]
+; THUNK2-NEXT: b
+
+;--- foo.ll
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-unknown-ios12.0.0"
+
+ at g = external local_unnamed_addr global [0 x i32], align 4
+ at g1 = external global i32, align 4
+ at g2 = external global i32, align 4
+
+define i32 @f1(i32 %a) {
+entry:
+ %idxprom = sext i32 %a to i64
+ %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %1 = load volatile i32, i32* @g1, align 4
+ %mul = mul nsw i32 %1, %0
+ %add = add nsw i32 %mul, 1
+ ret i32 %add
+}
+
+;--- goo.ll
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-unknown-ios12.0.0"
+
+ at g = external local_unnamed_addr global [0 x i32], align 4
+ at g1 = external global i32, align 4
+ at g2 = external global i32, align 4
+
+define i32 @f2(i32 %a) {
+entry:
+ %idxprom = sext i32 %a to i64
+ %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %1 = load volatile i32, i32* @g2, align 4
+ %mul = mul nsw i32 %1, %0
+ %add = add nsw i32 %mul, 1
+ ret i32 %add
+}
diff --git a/llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll b/llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll
new file mode 100644
index 00000000000000..a4022eb885b43f
--- /dev/null
+++ b/llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll
@@ -0,0 +1,97 @@
+; This test verifies whether a stable function is encoded into the __llvm_merge section
+; when the -codegen-data-generate flag is used under -enable-global-merge-func=true.
+
+; RUN: rm -rf %t; split-file %s %t
+
+; RUN: opt -module-summary -module-hash %t/foo.ll -o %t-foo.bc
+; RUN: opt -module-summary -module-hash %t/goo.ll -o %t-goo.bc
+
+; RUN: llvm-lto2 run -enable-global-merge-func=true -codegen-data-generate=false %t-foo.bc %t-goo.bc -o %tout-nowrite \
+; RUN: -r %t-foo.bc,_f1,px \
+; RUN: -r %t-goo.bc,_f2,px \
+; RUN: -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \
+; RUN: -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l
+; RUN: llvm-nm %tout-nowrite.1 | FileCheck %s --check-prefix=NOWRITE
+; RUN: llvm-nm %tout-nowrite.2 | FileCheck %s --check-prefix=NOWRITE
+
+; No merge instance is locally created as each module has a singltone function.
+; NOWRITE-NOT: _f1.Tgm
+; NOWRITE-NOT: _f2.Tgm
+
+; RUN: llvm-lto2 run -enable-global-merge-func=true -codegen-data-generate=true %t-foo.bc %t-goo.bc -o %tout-nowrite \
+; RUN: -r %t-foo.bc,_f1,px \
+; RUN: -r %t-goo.bc,_f2,px \
+; RUN: -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \
+; RUN: -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l
+; RUN: llvm-nm %tout-nowrite.1 | FileCheck %s --check-prefix=WRITE
+; RUN: llvm-nm %tout-nowrite.2 | FileCheck %s --check-prefix=WRITE
+; RUN: llvm-objdump -h %tout-nowrite.1 | FileCheck %s --check-prefix=SECTNAME
+; RUN: llvm-objdump -h %tout-nowrite.2 | FileCheck %s --check-prefix=SECTNAME
+
+; On a write mode, no merging happens yet for each module.
+; We only create stable functions and publish them into __llvm_merge section for each object.
+; WRITE-NOT: _f1.Tgm
+; WRITE-NOT: _f2.Tgm
+; SECTNAME: __llvm_merge
+
+; Merge the cgdata using llvm-cgdata.
+; We now validate the content of the merged cgdata.
+; Two functions have the same hash with only one different constnat at a same location.
+; RUN: llvm-cgdata --merge -o %tout.cgdata %tout-nowrite.1 %tout-nowrite.2
+; RUN: llvm-cgdata --convert %tout.cgdata -o - | FileCheck %s
+
+; CHECK: - Hash: [[#%d,HASH:]]
+; CHECK-NEXT: FunctionName: f1
+; CHECK-NEXT: ModuleName: {{.*}}
+; CHECK-NEXT: InstCount: [[#%d,INSTCOUNT:]]
+; CHECK-NEXT: IndexOperandHashes:
+; CHECK-NEXT: - InstIndex: [[#%d,INSTINDEX:]]
+; CHECK-NEXT: OpndIndex: [[#%d,OPNDINDEX:]]
+; CHECK-NEXT: OpndHash: {{.*}}
+
+; CHECK: - Hash: [[#%d,HASH]]
+; CHECK-NEXT: FunctionName: f2
+; CHECK-NEXT: ModuleName: {{.*}}
+; CHECK-NEXT: InstCount: [[#%d,INSTCOUNT]]
+; CHECK-NEXT: IndexOperandHashes:
+; CHECK-NEXT: - InstIndex: [[#%d,INSTINDEX]]
+; CHECK-NEXT: OpndIndex: [[#%d,OPNDINDEX]]
+; CHECK-NEXT: OpndHash: {{.*}}
+
+;--- foo.ll
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-unknown-ios12.0.0"
+
+ at g = external local_unnamed_addr global [0 x i32], align 4
+ at g1 = external global i32, align 4
+ at g2 = external global i32, align 4
+
+define i32 @f1(i32 %a) {
+entry:
+ %idxprom = sext i32 %a to i64
+ %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %1 = load volatile i32, i32* @g1, align 4
+ %mul = mul nsw i32 %1, %0
+ %add = add nsw i32 %mul, 1
+ ret i32 %add
+}
+
+;--- goo.ll
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-unknown-ios12.0.0"
+
+ at g = external local_unnamed_addr global [0 x i32], align 4
+ at g1 = external global i32, align 4
+ at g2 = external global i32, align 4
+
+define i32 @f2(i32 %a) {
+entry:
+ %idxprom = sext i32 %a to i64
+ %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %1 = load volatile i32, i32* @g2, align 4
+ %mul = mul nsw i32 %1, %0
+ %add = add nsw i32 %mul, 1
+ ret i32 %add
+}
diff --git a/llvm/tools/llvm-lto2/CMakeLists.txt b/llvm/tools/llvm-lto2/CMakeLists.txt
index 3b4644d6e27715..6ddccc7f17442f 100644
--- a/llvm/tools/llvm-lto2/CMakeLists.txt
+++ b/llvm/tools/llvm-lto2/CMakeLists.txt
@@ -6,6 +6,7 @@ set(LLVM_LINK_COMPONENTS
BitReader
CodeGen
Core
+ IPO
Linker
LTO
MC
diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp b/llvm/tools/llvm-lto2/llvm-lto2.cpp
index d4f022ef021a44..0dc6623552a4bd 100644
--- a/llvm/tools/llvm-lto2/llvm-lto2.cpp
+++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp
@@ -28,6 +28,7 @@
#include "llvm/Support/PluginLoader.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/Threading.h"
+#include "llvm/Transforms/IPO.h"
#include <atomic>
using namespace llvm;
@@ -195,6 +196,7 @@ static cl::opt<bool> TryUseNewDbgInfoFormat(
extern cl::opt<bool> UseNewDbgInfoFormat;
extern cl::opt<cl::boolOrDefault> LoadBitcodeIntoNewDbgInfoFormat;
extern cl::opt<cl::boolOrDefault> PreserveInputDbgFormat;
+extern cl::opt<bool> EnableGlobalMergeFunc;
static void check(Error E, std::string Msg) {
if (!E)
@@ -374,6 +376,10 @@ static int run(int argc, char **argv) {
if (DI.getSeverity() == DS_Error)
HasErrors = true;
};
+ Conf.PreCodeGenPassesHook = [](legacy::PassManager &pm) {
+ if (EnableGlobalMergeFunc)
+ pm.add(createGlobalMergeFuncPass());
+ };
LTO::LTOKind LTOMode = LTO::LTOK_Default;
>From e86d78bfa0961a1de88569e18ef5707aeaf680ea Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Fri, 18 Oct 2024 10:12:21 -0700
Subject: [PATCH 2/2] Address comments from tschuett
---
llvm/include/llvm/Transforms/IPO.h | 3 ++
.../Transforms/IPO/GlobalMergeFunctions.h | 45 ++++++++--------
.../Transforms/IPO/GlobalMergeFunctions.cpp | 53 +++++++++----------
3 files changed, 51 insertions(+), 50 deletions(-)
diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h
index 86a8654f56997c..28cf330ad20812 100644
--- a/llvm/include/llvm/Transforms/IPO.h
+++ b/llvm/include/llvm/Transforms/IPO.h
@@ -55,6 +55,9 @@ enum class PassSummaryAction {
Export, ///< Export information to summary.
};
+/// createGlobalMergeFuncPass - This pass generates merged instances by
+/// parameterizing distinct constants across similar functions, utilizing stable
+/// function hash information.
Pass *createGlobalMergeFuncPass();
} // End llvm namespace
diff --git a/llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h b/llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h
index 565be54f89e882..395aeda73b2d0f 100644
--- a/llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h
+++ b/llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h
@@ -5,23 +5,29 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-///
-/// This file defines global merge functions pass and related data structure.
-///
+//
+// This pass defines the implementation of a function merging mechanism
+// that utilizes a stable function hash to track differences in constants and
+// identify potential merge candidates. The process involves two rounds:
+// 1. The first round collects stable function hashes and identifies merge
+// candidates with matching hashes. It also computes the set of parameters
+// that point to different constants during the stable function merge.
+// 2. The second round leverages this collected global function information to
+// optimistically create a merged function in each module context, ensuring
+// correct transformation.
+// Similar to the global outliner, this approach uses the linker's deduplication
+// (ICF) to fold identical merged functions, thereby reducing the final binary
+// size. The work is inspired by the concepts discussed in the following paper:
+// https://dl.acm.org/doi/pdf/10.1145/3652032.3657575.
+//
//===----------------------------------------------------------------------===//
-#ifndef PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H
-#define PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H
+#ifndef LLVM_TRANSFORMS_IPO_GLOBALMERGEFUNCTIONS_H
+#define LLVM_TRANSFORMS_IPO_GLOBALMERGEFUNCTIONS_H
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StableHashing.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/CGData/StableFunctionMap.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
-#include <map>
-#include <mutex>
enum class HashFunctionMode {
Local,
@@ -36,15 +42,10 @@ namespace llvm {
using ParamLocs = SmallVector<IndexPair, 4>;
// A vector of parameters
using ParamLocsVecTy = SmallVector<ParamLocs, 8>;
-// A map of stable hash to a vector of stable functions
-
-/// GlobalMergeFunc finds functions which only differ by constants in
-/// certain instructions, e.g. resulting from specialized functions of layout
-/// compatible types.
-/// Unlike PikaMergeFunc that directly compares IRs, this uses stable function
-/// hash to find the merge candidate. Similar to the global outliner, we can run
-/// codegen twice to collect function merge candidate in the first round, and
-/// merge functions globally in the second round.
+
+/// GlobalMergeFunc is a ModulePass that implements a function merging mechanism
+/// using stable function hashes. It identifies and merges functions with
+/// matching hashes across modules to optimize binary size.
class GlobalMergeFunc : public ModulePass {
HashFunctionMode MergerMode = HashFunctionMode::Local;
@@ -69,9 +70,9 @@ class GlobalMergeFunc : public ModulePass {
/// Emit LocalFunctionMap into __llvm_merge section.
void emitFunctionMap(Module &M);
- /// Merge functions in the module using the global function map.
+ /// Merge functions in the module using the given function map.
bool merge(Module &M, const StableFunctionMap *FunctionMap);
};
} // end namespace llvm
-#endif // PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H
+#endif // LLVM_TRANSFORMS_IPO_GLOBALMERGEFUNCTIONS_H
diff --git a/llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp b/llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp
index 6fd1a66b5a197c..5a52c8f3304186 100644
--- a/llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp
@@ -6,16 +6,19 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: This implements a function merge using function hash while tracking
-// differences in Constants. This uses stable function hash to find potential
-// merge candidates. The first codegen round collects stable function hashes,
-// and determines the merge candidates that match the stable function hashes.
-// The set of parameters pointing to different Constants are also computed
-// during the stable function merge. The second codegen round uses this global
-// function info to optimistically create a merged function in each module
-// context to guarantee correct transformation. Similar to the global outliner,
-// the linker's deduplication (ICF) folds the identical merged functions to save
-// the final binary size.
+// This pass defines the implementation of a function merging mechanism
+// that utilizes a stable function hash to track differences in constants and
+// create potential merge candidates. The process involves two rounds:
+// 1. The first round collects stable function hashes and identifies merge
+// candidates with matching hashes. It also computes the set of parameters
+// that point to different constants during the stable function merge.
+// 2. The second round leverages this collected global function information to
+// optimistically create a merged function in each module context, ensuring
+// correct transformation.
+// Similar to the global outliner, this approach uses the linker's deduplication
+// (ICF) to fold identical merged functions, thereby reducing the final binary
+// size. The work is inspired by the concepts discussed in the following paper:
+// https://dl.acm.org/doi/pdf/10.1145/3652032.3657575.
//
//===----------------------------------------------------------------------===//
@@ -23,9 +26,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/CGData/CodeGenData.h"
-#include "llvm/CGData/StableFunctionMap.h"
-#include "llvm/CodeGen/MachineStableHash.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/StructuralHash.h"
#include "llvm/InitializePasses.h"
@@ -84,7 +84,7 @@ STATISTIC(NumAnalyzedModues, "Number of modules that are analyzed");
STATISTIC(NumAnalyzedFunctions, "Number of functions that are analyzed");
STATISTIC(NumEligibleFunctions, "Number of functions that are eligible");
-/// Returns true if the \opIdx operand of \p CI is the callee operand.
+/// Returns true if the \OpIdx operand of \p CI is the callee operand.
static bool isCalleeOperand(const CallBase *CI, unsigned OpIdx) {
return &CI->getCalledOperandUse() == &CI->getOperandUse(OpIdx);
}
@@ -148,22 +148,19 @@ bool isEligibleFunction(Function *F) {
if (F->hasFnAttribute(llvm::Attribute::NoMerge))
return false;
- if (F->hasAvailableExternallyLinkage()) {
+ if (F->hasAvailableExternallyLinkage())
return false;
- }
- if (F->getFunctionType()->isVarArg()) {
+ if (F->getFunctionType()->isVarArg())
return false;
- }
if (F->getCallingConv() == CallingConv::SwiftTail)
return false;
- // if function contains callsites with musttail, if we merge
+ // If function contains callsites with musttail, if we merge
// it, the merged function will have the musttail callsite, but
// the number of parameters can change, thus the parameter count
// of the callsite will mismatch with the function itself.
- // if (IgnoreMusttailFunction) {
for (const BasicBlock &BB : *F) {
for (const Instruction &I : BB) {
const auto *CB = dyn_cast<CallBase>(&I);
@@ -203,7 +200,6 @@ static bool ignoreOp(const Instruction *I, unsigned OpIdx) {
return true;
}
-// copy from merge functions.cpp
static Value *createCast(IRBuilder<> &Builder, Value *V, Type *DestTy) {
Type *SrcTy = V->getType();
if (SrcTy->isStructTy()) {
@@ -252,7 +248,8 @@ void GlobalMergeFunc::analyze(Module &M) {
auto FI = llvm::StructuralHashWithDifferences(Func, ignoreOp);
- // Convert the map to a vector for a serialization-friendly format.
+ // Convert the operand map to a vector for a serialization-friendly
+ // format.
IndexOperandHashVecType IndexOperandHashes;
for (auto &Pair : *FI.IndexOperandHashMap)
IndexOperandHashes.emplace_back(Pair);
@@ -597,7 +594,7 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
// This module check is not strictly necessary as the functions can move
// around. We just want to avoid merging functions from different
// modules than the first one in the functon map, as they may not end up
- // with not being ICFed.
+ // with not being ICFed by the linker.
if (MergedModId != *FunctionMap->getNameForId(SF->ModuleNameId)) {
++NumMismatchedModuleIdGlobalMergeFunction;
continue;
@@ -618,12 +615,12 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
dbgs() << "[GlobalMergeFunc] Merging function count " << FuncMergeInfoSize
<< " in " << ModId << "\n";
});
+
for (auto &FMI : FuncMergeInfos) {
Changed = true;
// We've already validated all locations of constant operands pointed by
- // the parameters. Just use the first one to bookkeep the original
- // constants for each parameter
+ // the parameters. Populate parameters pointing to the original constants.
SmallVector<Constant *> Params;
SmallVector<Type *> ParamTypes;
for (auto &ParamLocs : ParamLocsVec) {
@@ -635,8 +632,7 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
ParamTypes.push_back(Opnd->getType());
}
- // Create a merged function derived from the first function in the current
- // module context.
+ // Create a merged function derived from the current function.
Function *MergedFunc =
createMergedFunction(FMI, ParamTypes, ParamLocsVec);
@@ -647,7 +643,8 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
MergedFunc->dump();
});
- // Create a thunk to the merged function.
+ // Transform the current function into a thunk that calls the merged
+ // function.
createThunk(FMI, Params, MergedFunc);
LLVM_DEBUG({
dbgs() << "[GlobalMergeFunc] Thunk generated: \n";
More information about the llvm-commits
mailing list