[llvm] [CGData] Refactor Global Merge Functions (PR #115750)
Kyungwoo Lee via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 13 17:38:07 PST 2024
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/115750
>From 3358b7cfc7389b8e75d3043465846027ee319fb4 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Mon, 11 Nov 2024 10:06:56 -0800
Subject: [PATCH 1/3] [CGData] Refactor Global Merge Functions
---
llvm/lib/CodeGen/GlobalMergeFunctions.cpp | 148 +++++++++-------------
1 file changed, 59 insertions(+), 89 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
index 2b367ca87d9008..df8dbb8a73b95d 100644
--- a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
+++ b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
@@ -31,14 +31,6 @@ static cl::opt<bool> DisableCGDataForMerging(
"merging is still enabled within a module."),
cl::init(false));
-STATISTIC(NumMismatchedFunctionHash,
- "Number of mismatched function hash for global merge function");
-STATISTIC(NumMismatchedInstCount,
- "Number of mismatched instruction count for global merge function");
-STATISTIC(NumMismatchedConstHash,
- "Number of mismatched const hash for global merge function");
-STATISTIC(NumMismatchedModuleId,
- "Number of mismatched Module Id for global merge function");
STATISTIC(NumMergedFunctions,
"Number of functions that are actually merged using function hash");
STATISTIC(NumAnalyzedModues, "Number of modules that are analyzed");
@@ -203,9 +195,9 @@ void GlobalMergeFunc::analyze(Module &M) {
struct FuncMergeInfo {
StableFunctionMap::StableFunctionEntry *SF;
Function *F;
- std::unique_ptr<IndexInstrMap> IndexInstruction;
+ IndexInstrMap *IndexInstruction;
FuncMergeInfo(StableFunctionMap::StableFunctionEntry *SF, Function *F,
- std::unique_ptr<IndexInstrMap> IndexInstruction)
+ IndexInstrMap *IndexInstruction)
: SF(SF), F(F), IndexInstruction(std::move(IndexInstruction)) {}
};
@@ -420,101 +412,79 @@ static ParamLocsVecTy computeParamInfo(
bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
bool Changed = false;
- // Build a map from stable function name to function.
- StringMap<Function *> StableNameToFuncMap;
- for (auto &F : M)
- StableNameToFuncMap[get_stable_name(F.getName())] = &F;
- // Track merged functions
- DenseSet<Function *> MergedFunctions;
-
- auto ModId = M.getModuleIdentifier();
- for (auto &[Hash, SFS] : FunctionMap->getFunctionMap()) {
- // Parameter locations based on the unique hash sequences
- // across the candidates.
+ // Collect stable functions related to the current module.
+ DenseMap<stable_hash, SmallVector<Function *>> HashToFuncs;
+ DenseMap<Function *, FunctionHashInfo> FuncToFI;
+ auto &Maps = FunctionMap->getFunctionMap();
+ for (auto &F : M) {
+ if (!isEligibleFunction(&F))
+ continue;
+ auto FI = llvm::StructuralHashWithDifferences(F, ignoreOp);
+ if (Maps.contains(FI.FunctionHash)) {
+ HashToFuncs[FI.FunctionHash].push_back(&F);
+ FuncToFI.try_emplace(&F, std::move(FI));
+ }
+ }
+
+ for (auto &[Hash, Funcs] : HashToFuncs) {
std::optional<ParamLocsVecTy> ParamLocsVec;
- Function *MergedFunc = nullptr;
- std::string MergedModId;
SmallVector<FuncMergeInfo> FuncMergeInfos;
- for (auto &SF : SFS) {
- // Get the function from the stable name.
- auto I = StableNameToFuncMap.find(
- *FunctionMap->getNameForId(SF->FunctionNameId));
- if (I == StableNameToFuncMap.end())
- continue;
- Function *F = I->second;
- assert(F);
- // Skip if the function has been merged before.
- if (MergedFunctions.count(F))
- continue;
- // Consider the function if it is eligible for merging.
- if (!isEligibleFunction(F))
- continue;
- auto FI = llvm::StructuralHashWithDifferences(*F, ignoreOp);
- uint64_t FuncHash = FI.FunctionHash;
- if (Hash != FuncHash) {
- ++NumMismatchedFunctionHash;
- continue;
- }
+ // Iterate functions with the same hash.
+ for (auto &F : Funcs) {
+ auto &SFS = Maps.at(Hash);
+ auto &FI = FuncToFI.at(F);
- if (SF->InstCount != FI.IndexInstruction->size()) {
- ++NumMismatchedInstCount;
+ // Check if the function is compatible with any stable function
+ // in terms of the number of instructions and ignored operands.
+ assert(!SFS.empty());
+ auto &RFS = SFS[0];
+ if (RFS->InstCount != FI.IndexInstruction->size())
continue;
- }
- bool HasValidSharedConst = true;
- for (auto &[Index, Hash] : *SF->IndexOperandHashMap) {
- auto [InstIndex, OpndIndex] = Index;
- assert(InstIndex < FI.IndexInstruction->size());
- auto *Inst = FI.IndexInstruction->lookup(InstIndex);
- if (!ignoreOp(Inst, OpndIndex)) {
- HasValidSharedConst = false;
- break;
- }
- }
- if (!HasValidSharedConst) {
- ++NumMismatchedConstHash;
- continue;
- }
- if (!checkConstHashCompatible(*SF->IndexOperandHashMap,
- *FI.IndexOperandHashMap)) {
- ++NumMismatchedConstHash;
- continue;
- }
- if (!ParamLocsVec.has_value()) {
- ParamLocsVec = computeParamInfo(SFS);
- LLVM_DEBUG(dbgs() << "[GlobalMergeFunc] Merging hash: " << Hash
- << " with Params " << ParamLocsVec->size() << "\n");
- }
- if (!checkConstLocationCompatible(*SF, *FI.IndexInstruction,
- *ParamLocsVec)) {
- ++NumMismatchedConstHash;
+
+ auto hasValidSharedConst =
+ [&](StableFunctionMap::StableFunctionEntry *SF) {
+ for (auto &[Index, Hash] : *SF->IndexOperandHashMap) {
+ auto [InstIndex, OpndIndex] = Index;
+ assert(InstIndex < FI.IndexInstruction->size());
+ auto *Inst = FI.IndexInstruction->lookup(InstIndex);
+ if (!ignoreOp(Inst, OpndIndex))
+ return false;
+ }
+ return true;
+ };
+ if (!hasValidSharedConst(RFS.get()))
continue;
- }
- if (MergedFunc) {
- // Check if the matched functions fall into the same (first) module.
- // This module check is not strictly necessary as the functions can move
- // around. We just want to avoid merging functions from different
- // modules than the first one in the function map, as they may not end
- // up with being ICFed by the linker.
- if (MergedModId != *FunctionMap->getNameForId(SF->ModuleNameId)) {
- ++NumMismatchedModuleId;
+ for (auto &SF : SFS) {
+ assert(SF->InstCount == FI.IndexInstruction->size());
+ assert(hasValidSharedConst(SF.get()));
+ // Check if there is any stable function that is compatiable with the
+ // current one.
+ if (!checkConstHashCompatible(*SF->IndexOperandHashMap,
+ *FI.IndexOperandHashMap))
continue;
+ if (!ParamLocsVec.has_value()) {
+ ParamLocsVec = computeParamInfo(SFS);
+ LLVM_DEBUG(dbgs() << "[GlobalMergeFunc] Merging hash: " << Hash
+ << " with Params " << ParamLocsVec->size() << "\n");
}
- } else {
- MergedFunc = F;
- MergedModId = *FunctionMap->getNameForId(SF->ModuleNameId);
- }
+ if (!checkConstLocationCompatible(*SF, *FI.IndexInstruction,
+ *ParamLocsVec))
+ continue;
- FuncMergeInfos.emplace_back(SF.get(), F, std::move(FI.IndexInstruction));
- MergedFunctions.insert(F);
+ // As long as we found one stable function matching the current one,
+ // we create a candidate for merging and move on to the next function.
+ FuncMergeInfos.emplace_back(SF.get(), F, FI.IndexInstruction.get());
+ break;
+ }
}
unsigned FuncMergeInfoSize = FuncMergeInfos.size();
if (FuncMergeInfoSize == 0)
continue;
LLVM_DEBUG(dbgs() << "[GlobalMergeFunc] Merging function count "
- << FuncMergeInfoSize << " in " << ModId << "\n");
+ << FuncMergeInfoSize << " for hash: " << Hash << "\n");
for (auto &FMI : FuncMergeInfos) {
Changed = true;
>From dc6f20e083c6e8332dd601c84f97e220514c2c33 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Mon, 11 Nov 2024 21:23:58 -0800
Subject: [PATCH 2/3] Address comments from nocchijiang
---
llvm/lib/CodeGen/GlobalMergeFunctions.cpp | 24 ++++++++++-------------
1 file changed, 10 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
index df8dbb8a73b95d..a0b3bdf8efc575 100644
--- a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
+++ b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
@@ -413,32 +413,28 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
bool Changed = false;
// Collect stable functions related to the current module.
- DenseMap<stable_hash, SmallVector<Function *>> HashToFuncs;
- DenseMap<Function *, FunctionHashInfo> FuncToFI;
+ DenseMap<stable_hash, SmallVector<std::pair<Function *, FunctionHashInfo>>>
+ HashToFuncs;
auto &Maps = FunctionMap->getFunctionMap();
for (auto &F : M) {
if (!isEligibleFunction(&F))
continue;
auto FI = llvm::StructuralHashWithDifferences(F, ignoreOp);
- if (Maps.contains(FI.FunctionHash)) {
- HashToFuncs[FI.FunctionHash].push_back(&F);
- FuncToFI.try_emplace(&F, std::move(FI));
- }
+ if (Maps.contains(FI.FunctionHash))
+ HashToFuncs[FI.FunctionHash].emplace_back(&F, std::move(FI));
}
for (auto &[Hash, Funcs] : HashToFuncs) {
std::optional<ParamLocsVecTy> ParamLocsVec;
SmallVector<FuncMergeInfo> FuncMergeInfos;
+ auto &SFS = Maps.at(Hash);
+ assert(!SFS.empty());
+ auto &RFS = SFS[0];
// Iterate functions with the same hash.
- for (auto &F : Funcs) {
- auto &SFS = Maps.at(Hash);
- auto &FI = FuncToFI.at(F);
-
+ for (auto &[F, FI] : Funcs) {
// Check if the function is compatible with any stable function
// in terms of the number of instructions and ignored operands.
- assert(!SFS.empty());
- auto &RFS = SFS[0];
if (RFS->InstCount != FI.IndexInstruction->size())
continue;
@@ -473,8 +469,8 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
*ParamLocsVec))
continue;
- // As long as we found one stable function matching the current one,
- // we create a candidate for merging and move on to the next function.
+ // If a stable function matching the current one is found,
+ // create a candidate for merging and proceed to the next function.
FuncMergeInfos.emplace_back(SF.get(), F, FI.IndexInstruction.get());
break;
}
>From 5edbc305d03f0d74afe4a22a1bf5154b5931870f Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Tue, 12 Nov 2024 06:25:00 -0800
Subject: [PATCH 3/3] Address 2nd comments from nocchijiang
---
llvm/lib/CodeGen/GlobalMergeFunctions.cpp | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
index a0b3bdf8efc575..502e1a1f274354 100644
--- a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
+++ b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
@@ -102,7 +102,7 @@ bool isEligibleFunction(Function *F) {
return true;
}
-static bool isEligibleInstrunctionForConstantSharing(const Instruction *I) {
+static bool isEligibleInstructionForConstantSharing(const Instruction *I) {
switch (I->getOpcode()) {
case Instruction::Load:
case Instruction::Store:
@@ -114,10 +114,15 @@ static bool isEligibleInstrunctionForConstantSharing(const Instruction *I) {
}
}
+// This function takes an instruction, \p I, and an operand index, \p OpIdx.
+// It returns true if the operand should be ignored in the hash computation.
+// If \p OpIdx is out of range based on the other instruction context, it cannot
+// be ignored.
static bool ignoreOp(const Instruction *I, unsigned OpIdx) {
- assert(OpIdx < I->getNumOperands() && "Invalid operand index");
+ if (OpIdx >= I->getNumOperands())
+ return false;
- if (!isEligibleInstrunctionForConstantSharing(I))
+ if (!isEligibleInstructionForConstantSharing(I))
return false;
if (!isa<Constant>(I->getOperand(OpIdx)))
More information about the llvm-commits
mailing list