[llvm] d3da788 - [CGData] Refactor Global Merge Functions (#115750)

via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 13 21:15:22 PST 2024


Author: Kyungwoo Lee
Date: 2024-11-13T21:15:19-08:00
New Revision: d3da78863c7021fa2447a168dc03ad791db69dc6

URL: https://github.com/llvm/llvm-project/commit/d3da78863c7021fa2447a168dc03ad791db69dc6
DIFF: https://github.com/llvm/llvm-project/commit/d3da78863c7021fa2447a168dc03ad791db69dc6.diff

LOG: [CGData] Refactor Global Merge Functions (#115750)

This is a follow-up PR to refactor the initial global merge function
pass implemented in #112671.

It first collects stable functions relevant to the current module and
iterates over those only, instead of iterating through all stable
functions in the stable function map.

This is a patch for
https://discourse.llvm.org/t/rfc-global-function-merging/82608.

Added: 
    

Modified: 
    llvm/lib/CodeGen/GlobalMergeFunctions.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
index 2b367ca87d9008..502e1a1f274354 100644
--- a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
+++ b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
@@ -31,14 +31,6 @@ static cl::opt<bool> DisableCGDataForMerging(
              "merging is still enabled within a module."),
     cl::init(false));
 
-STATISTIC(NumMismatchedFunctionHash,
-          "Number of mismatched function hash for global merge function");
-STATISTIC(NumMismatchedInstCount,
-          "Number of mismatched instruction count for global merge function");
-STATISTIC(NumMismatchedConstHash,
-          "Number of mismatched const hash for global merge function");
-STATISTIC(NumMismatchedModuleId,
-          "Number of mismatched Module Id for global merge function");
 STATISTIC(NumMergedFunctions,
           "Number of functions that are actually merged using function hash");
 STATISTIC(NumAnalyzedModues, "Number of modules that are analyzed");
@@ -110,7 +102,7 @@ bool isEligibleFunction(Function *F) {
   return true;
 }
 
-static bool isEligibleInstrunctionForConstantSharing(const Instruction *I) {
+static bool isEligibleInstructionForConstantSharing(const Instruction *I) {
   switch (I->getOpcode()) {
   case Instruction::Load:
   case Instruction::Store:
@@ -122,10 +114,15 @@ static bool isEligibleInstrunctionForConstantSharing(const Instruction *I) {
   }
 }
 
+// This function takes an instruction, \p I, and an operand index, \p OpIdx.
+// It returns true if the operand should be ignored in the hash computation.
+// If \p OpIdx is out of range based on the other instruction context, it cannot
+// be ignored.
 static bool ignoreOp(const Instruction *I, unsigned OpIdx) {
-  assert(OpIdx < I->getNumOperands() && "Invalid operand index");
+  if (OpIdx >= I->getNumOperands())
+    return false;
 
-  if (!isEligibleInstrunctionForConstantSharing(I))
+  if (!isEligibleInstructionForConstantSharing(I))
     return false;
 
   if (!isa<Constant>(I->getOperand(OpIdx)))
@@ -203,9 +200,9 @@ void GlobalMergeFunc::analyze(Module &M) {
 struct FuncMergeInfo {
   StableFunctionMap::StableFunctionEntry *SF;
   Function *F;
-  std::unique_ptr<IndexInstrMap> IndexInstruction;
+  IndexInstrMap *IndexInstruction;
   FuncMergeInfo(StableFunctionMap::StableFunctionEntry *SF, Function *F,
-                std::unique_ptr<IndexInstrMap> IndexInstruction)
+                IndexInstrMap *IndexInstruction)
       : SF(SF), F(F), IndexInstruction(std::move(IndexInstruction)) {}
 };
 
@@ -420,101 +417,75 @@ static ParamLocsVecTy computeParamInfo(
 bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
   bool Changed = false;
 
-  // Build a map from stable function name to function.
-  StringMap<Function *> StableNameToFuncMap;
-  for (auto &F : M)
-    StableNameToFuncMap[get_stable_name(F.getName())] = &F;
-  // Track merged functions
-  DenseSet<Function *> MergedFunctions;
-
-  auto ModId = M.getModuleIdentifier();
-  for (auto &[Hash, SFS] : FunctionMap->getFunctionMap()) {
-    // Parameter locations based on the unique hash sequences
-    // across the candidates.
+  // Collect stable functions related to the current module.
+  DenseMap<stable_hash, SmallVector<std::pair<Function *, FunctionHashInfo>>>
+      HashToFuncs;
+  auto &Maps = FunctionMap->getFunctionMap();
+  for (auto &F : M) {
+    if (!isEligibleFunction(&F))
+      continue;
+    auto FI = llvm::StructuralHashWithDifferences(F, ignoreOp);
+    if (Maps.contains(FI.FunctionHash))
+      HashToFuncs[FI.FunctionHash].emplace_back(&F, std::move(FI));
+  }
+
+  for (auto &[Hash, Funcs] : HashToFuncs) {
     std::optional<ParamLocsVecTy> ParamLocsVec;
-    Function *MergedFunc = nullptr;
-    std::string MergedModId;
     SmallVector<FuncMergeInfo> FuncMergeInfos;
-    for (auto &SF : SFS) {
-      // Get the function from the stable name.
-      auto I = StableNameToFuncMap.find(
-          *FunctionMap->getNameForId(SF->FunctionNameId));
-      if (I == StableNameToFuncMap.end())
-        continue;
-      Function *F = I->second;
-      assert(F);
-      // Skip if the function has been merged before.
-      if (MergedFunctions.count(F))
-        continue;
-      // Consider the function if it is eligible for merging.
-      if (!isEligibleFunction(F))
+    auto &SFS = Maps.at(Hash);
+    assert(!SFS.empty());
+    auto &RFS = SFS[0];
+
+    // Iterate functions with the same hash.
+    for (auto &[F, FI] : Funcs) {
+      // Check if the function is compatible with any stable function
+      // in terms of the number of instructions and ignored operands.
+      if (RFS->InstCount != FI.IndexInstruction->size())
         continue;
 
-      auto FI = llvm::StructuralHashWithDifferences(*F, ignoreOp);
-      uint64_t FuncHash = FI.FunctionHash;
-      if (Hash != FuncHash) {
-        ++NumMismatchedFunctionHash;
+      auto hasValidSharedConst =
+          [&](StableFunctionMap::StableFunctionEntry *SF) {
+            for (auto &[Index, Hash] : *SF->IndexOperandHashMap) {
+              auto [InstIndex, OpndIndex] = Index;
+              assert(InstIndex < FI.IndexInstruction->size());
+              auto *Inst = FI.IndexInstruction->lookup(InstIndex);
+              if (!ignoreOp(Inst, OpndIndex))
+                return false;
+            }
+            return true;
+          };
+      if (!hasValidSharedConst(RFS.get()))
         continue;
-      }
 
-      if (SF->InstCount != FI.IndexInstruction->size()) {
-        ++NumMismatchedInstCount;
-        continue;
-      }
-      bool HasValidSharedConst = true;
-      for (auto &[Index, Hash] : *SF->IndexOperandHashMap) {
-        auto [InstIndex, OpndIndex] = Index;
-        assert(InstIndex < FI.IndexInstruction->size());
-        auto *Inst = FI.IndexInstruction->lookup(InstIndex);
-        if (!ignoreOp(Inst, OpndIndex)) {
-          HasValidSharedConst = false;
-          break;
-        }
-      }
-      if (!HasValidSharedConst) {
-        ++NumMismatchedConstHash;
-        continue;
-      }
-      if (!checkConstHashCompatible(*SF->IndexOperandHashMap,
-                                    *FI.IndexOperandHashMap)) {
-        ++NumMismatchedConstHash;
-        continue;
-      }
-      if (!ParamLocsVec.has_value()) {
-        ParamLocsVec = computeParamInfo(SFS);
-        LLVM_DEBUG(dbgs() << "[GlobalMergeFunc] Merging hash: " << Hash
-                          << " with Params " << ParamLocsVec->size() << "\n");
-      }
-      if (!checkConstLocationCompatible(*SF, *FI.IndexInstruction,
-                                        *ParamLocsVec)) {
-        ++NumMismatchedConstHash;
-        continue;
-      }
-
-      if (MergedFunc) {
-        // Check if the matched functions fall into the same (first) module.
-        // This module check is not strictly necessary as the functions can move
-        // around. We just want to avoid merging functions from 
diff erent
-        // modules than the first one in the function map, as they may not end
-        // up with being ICFed by the linker.
-        if (MergedModId != *FunctionMap->getNameForId(SF->ModuleNameId)) {
-          ++NumMismatchedModuleId;
+      for (auto &SF : SFS) {
+        assert(SF->InstCount == FI.IndexInstruction->size());
+        assert(hasValidSharedConst(SF.get()));
+        // Check if there is any stable function that is compatiable with the
+        // current one.
+        if (!checkConstHashCompatible(*SF->IndexOperandHashMap,
+                                      *FI.IndexOperandHashMap))
           continue;
+        if (!ParamLocsVec.has_value()) {
+          ParamLocsVec = computeParamInfo(SFS);
+          LLVM_DEBUG(dbgs() << "[GlobalMergeFunc] Merging hash: " << Hash
+                            << " with Params " << ParamLocsVec->size() << "\n");
         }
-      } else {
-        MergedFunc = F;
-        MergedModId = *FunctionMap->getNameForId(SF->ModuleNameId);
-      }
+        if (!checkConstLocationCompatible(*SF, *FI.IndexInstruction,
+                                          *ParamLocsVec))
+          continue;
 
-      FuncMergeInfos.emplace_back(SF.get(), F, std::move(FI.IndexInstruction));
-      MergedFunctions.insert(F);
+        // If a stable function matching the current one is found,
+        // create a candidate for merging and proceed to the next function.
+        FuncMergeInfos.emplace_back(SF.get(), F, FI.IndexInstruction.get());
+        break;
+      }
     }
     unsigned FuncMergeInfoSize = FuncMergeInfos.size();
     if (FuncMergeInfoSize == 0)
       continue;
 
     LLVM_DEBUG(dbgs() << "[GlobalMergeFunc] Merging function count "
-                      << FuncMergeInfoSize << " in  " << ModId << "\n");
+                      << FuncMergeInfoSize << " for hash:  " << Hash << "\n");
 
     for (auto &FMI : FuncMergeInfos) {
       Changed = true;


        


More information about the llvm-commits mailing list