[llvm] [CGData] Refactor Global Merge Functions (PR #115750)

Kyungwoo Lee via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 13 17:38:07 PST 2024


https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/115750

>From 3358b7cfc7389b8e75d3043465846027ee319fb4 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Mon, 11 Nov 2024 10:06:56 -0800
Subject: [PATCH 1/3] [CGData] Refactor Global Merge Functions

---
 llvm/lib/CodeGen/GlobalMergeFunctions.cpp | 148 +++++++++-------------
 1 file changed, 59 insertions(+), 89 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
index 2b367ca87d9008..df8dbb8a73b95d 100644
--- a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
+++ b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
@@ -31,14 +31,6 @@ static cl::opt<bool> DisableCGDataForMerging(
              "merging is still enabled within a module."),
     cl::init(false));
 
-STATISTIC(NumMismatchedFunctionHash,
-          "Number of mismatched function hash for global merge function");
-STATISTIC(NumMismatchedInstCount,
-          "Number of mismatched instruction count for global merge function");
-STATISTIC(NumMismatchedConstHash,
-          "Number of mismatched const hash for global merge function");
-STATISTIC(NumMismatchedModuleId,
-          "Number of mismatched Module Id for global merge function");
 STATISTIC(NumMergedFunctions,
           "Number of functions that are actually merged using function hash");
 STATISTIC(NumAnalyzedModues, "Number of modules that are analyzed");
@@ -203,9 +195,9 @@ void GlobalMergeFunc::analyze(Module &M) {
 struct FuncMergeInfo {
   StableFunctionMap::StableFunctionEntry *SF;
   Function *F;
-  std::unique_ptr<IndexInstrMap> IndexInstruction;
+  IndexInstrMap *IndexInstruction;
   FuncMergeInfo(StableFunctionMap::StableFunctionEntry *SF, Function *F,
-                std::unique_ptr<IndexInstrMap> IndexInstruction)
+                IndexInstrMap *IndexInstruction)
       : SF(SF), F(F), IndexInstruction(std::move(IndexInstruction)) {}
 };
 
@@ -420,101 +412,79 @@ static ParamLocsVecTy computeParamInfo(
 bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
   bool Changed = false;
 
-  // Build a map from stable function name to function.
-  StringMap<Function *> StableNameToFuncMap;
-  for (auto &F : M)
-    StableNameToFuncMap[get_stable_name(F.getName())] = &F;
-  // Track merged functions
-  DenseSet<Function *> MergedFunctions;
-
-  auto ModId = M.getModuleIdentifier();
-  for (auto &[Hash, SFS] : FunctionMap->getFunctionMap()) {
-    // Parameter locations based on the unique hash sequences
-    // across the candidates.
+  // Collect stable functions related to the current module.
+  DenseMap<stable_hash, SmallVector<Function *>> HashToFuncs;
+  DenseMap<Function *, FunctionHashInfo> FuncToFI;
+  auto &Maps = FunctionMap->getFunctionMap();
+  for (auto &F : M) {
+    if (!isEligibleFunction(&F))
+      continue;
+    auto FI = llvm::StructuralHashWithDifferences(F, ignoreOp);
+    if (Maps.contains(FI.FunctionHash)) {
+      HashToFuncs[FI.FunctionHash].push_back(&F);
+      FuncToFI.try_emplace(&F, std::move(FI));
+    }
+  }
+
+  for (auto &[Hash, Funcs] : HashToFuncs) {
     std::optional<ParamLocsVecTy> ParamLocsVec;
-    Function *MergedFunc = nullptr;
-    std::string MergedModId;
     SmallVector<FuncMergeInfo> FuncMergeInfos;
-    for (auto &SF : SFS) {
-      // Get the function from the stable name.
-      auto I = StableNameToFuncMap.find(
-          *FunctionMap->getNameForId(SF->FunctionNameId));
-      if (I == StableNameToFuncMap.end())
-        continue;
-      Function *F = I->second;
-      assert(F);
-      // Skip if the function has been merged before.
-      if (MergedFunctions.count(F))
-        continue;
-      // Consider the function if it is eligible for merging.
-      if (!isEligibleFunction(F))
-        continue;
 
-      auto FI = llvm::StructuralHashWithDifferences(*F, ignoreOp);
-      uint64_t FuncHash = FI.FunctionHash;
-      if (Hash != FuncHash) {
-        ++NumMismatchedFunctionHash;
-        continue;
-      }
+    // Iterate functions with the same hash.
+    for (auto &F : Funcs) {
+      auto &SFS = Maps.at(Hash);
+      auto &FI = FuncToFI.at(F);
 
-      if (SF->InstCount != FI.IndexInstruction->size()) {
-        ++NumMismatchedInstCount;
+      // Check if the function is compatible with any stable function
+      // in terms of the number of instructions and ignored operands.
+      assert(!SFS.empty());
+      auto &RFS = SFS[0];
+      if (RFS->InstCount != FI.IndexInstruction->size())
         continue;
-      }
-      bool HasValidSharedConst = true;
-      for (auto &[Index, Hash] : *SF->IndexOperandHashMap) {
-        auto [InstIndex, OpndIndex] = Index;
-        assert(InstIndex < FI.IndexInstruction->size());
-        auto *Inst = FI.IndexInstruction->lookup(InstIndex);
-        if (!ignoreOp(Inst, OpndIndex)) {
-          HasValidSharedConst = false;
-          break;
-        }
-      }
-      if (!HasValidSharedConst) {
-        ++NumMismatchedConstHash;
-        continue;
-      }
-      if (!checkConstHashCompatible(*SF->IndexOperandHashMap,
-                                    *FI.IndexOperandHashMap)) {
-        ++NumMismatchedConstHash;
-        continue;
-      }
-      if (!ParamLocsVec.has_value()) {
-        ParamLocsVec = computeParamInfo(SFS);
-        LLVM_DEBUG(dbgs() << "[GlobalMergeFunc] Merging hash: " << Hash
-                          << " with Params " << ParamLocsVec->size() << "\n");
-      }
-      if (!checkConstLocationCompatible(*SF, *FI.IndexInstruction,
-                                        *ParamLocsVec)) {
-        ++NumMismatchedConstHash;
+
+      auto hasValidSharedConst =
+          [&](StableFunctionMap::StableFunctionEntry *SF) {
+            for (auto &[Index, Hash] : *SF->IndexOperandHashMap) {
+              auto [InstIndex, OpndIndex] = Index;
+              assert(InstIndex < FI.IndexInstruction->size());
+              auto *Inst = FI.IndexInstruction->lookup(InstIndex);
+              if (!ignoreOp(Inst, OpndIndex))
+                return false;
+            }
+            return true;
+          };
+      if (!hasValidSharedConst(RFS.get()))
         continue;
-      }
 
-      if (MergedFunc) {
-        // Check if the matched functions fall into the same (first) module.
-        // This module check is not strictly necessary as the functions can move
-        // around. We just want to avoid merging functions from different
-        // modules than the first one in the function map, as they may not end
-        // up with being ICFed by the linker.
-        if (MergedModId != *FunctionMap->getNameForId(SF->ModuleNameId)) {
-          ++NumMismatchedModuleId;
+      for (auto &SF : SFS) {
+        assert(SF->InstCount == FI.IndexInstruction->size());
+        assert(hasValidSharedConst(SF.get()));
+        // Check if there is any stable function that is compatiable with the
+        // current one.
+        if (!checkConstHashCompatible(*SF->IndexOperandHashMap,
+                                      *FI.IndexOperandHashMap))
           continue;
+        if (!ParamLocsVec.has_value()) {
+          ParamLocsVec = computeParamInfo(SFS);
+          LLVM_DEBUG(dbgs() << "[GlobalMergeFunc] Merging hash: " << Hash
+                            << " with Params " << ParamLocsVec->size() << "\n");
         }
-      } else {
-        MergedFunc = F;
-        MergedModId = *FunctionMap->getNameForId(SF->ModuleNameId);
-      }
+        if (!checkConstLocationCompatible(*SF, *FI.IndexInstruction,
+                                          *ParamLocsVec))
+          continue;
 
-      FuncMergeInfos.emplace_back(SF.get(), F, std::move(FI.IndexInstruction));
-      MergedFunctions.insert(F);
+        // As long as we found one stable function matching the current one,
+        // we create a candidate for merging and move on to the next function.
+        FuncMergeInfos.emplace_back(SF.get(), F, FI.IndexInstruction.get());
+        break;
+      }
     }
     unsigned FuncMergeInfoSize = FuncMergeInfos.size();
     if (FuncMergeInfoSize == 0)
       continue;
 
     LLVM_DEBUG(dbgs() << "[GlobalMergeFunc] Merging function count "
-                      << FuncMergeInfoSize << " in  " << ModId << "\n");
+                      << FuncMergeInfoSize << " for hash:  " << Hash << "\n");
 
     for (auto &FMI : FuncMergeInfos) {
       Changed = true;

>From dc6f20e083c6e8332dd601c84f97e220514c2c33 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Mon, 11 Nov 2024 21:23:58 -0800
Subject: [PATCH 2/3] Address comments from nocchijiang

---
 llvm/lib/CodeGen/GlobalMergeFunctions.cpp | 24 ++++++++++-------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
index df8dbb8a73b95d..a0b3bdf8efc575 100644
--- a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
+++ b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
@@ -413,32 +413,28 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
   bool Changed = false;
 
   // Collect stable functions related to the current module.
-  DenseMap<stable_hash, SmallVector<Function *>> HashToFuncs;
-  DenseMap<Function *, FunctionHashInfo> FuncToFI;
+  DenseMap<stable_hash, SmallVector<std::pair<Function *, FunctionHashInfo>>>
+      HashToFuncs;
   auto &Maps = FunctionMap->getFunctionMap();
   for (auto &F : M) {
     if (!isEligibleFunction(&F))
       continue;
     auto FI = llvm::StructuralHashWithDifferences(F, ignoreOp);
-    if (Maps.contains(FI.FunctionHash)) {
-      HashToFuncs[FI.FunctionHash].push_back(&F);
-      FuncToFI.try_emplace(&F, std::move(FI));
-    }
+    if (Maps.contains(FI.FunctionHash))
+      HashToFuncs[FI.FunctionHash].emplace_back(&F, std::move(FI));
   }
 
   for (auto &[Hash, Funcs] : HashToFuncs) {
     std::optional<ParamLocsVecTy> ParamLocsVec;
     SmallVector<FuncMergeInfo> FuncMergeInfos;
+    auto &SFS = Maps.at(Hash);
+    assert(!SFS.empty());
+    auto &RFS = SFS[0];
 
     // Iterate functions with the same hash.
-    for (auto &F : Funcs) {
-      auto &SFS = Maps.at(Hash);
-      auto &FI = FuncToFI.at(F);
-
+    for (auto &[F, FI] : Funcs) {
       // Check if the function is compatible with any stable function
       // in terms of the number of instructions and ignored operands.
-      assert(!SFS.empty());
-      auto &RFS = SFS[0];
       if (RFS->InstCount != FI.IndexInstruction->size())
         continue;
 
@@ -473,8 +469,8 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
                                           *ParamLocsVec))
           continue;
 
-        // As long as we found one stable function matching the current one,
-        // we create a candidate for merging and move on to the next function.
+        // If a stable function matching the current one is found,
+        // create a candidate for merging and proceed to the next function.
         FuncMergeInfos.emplace_back(SF.get(), F, FI.IndexInstruction.get());
         break;
       }

>From 5edbc305d03f0d74afe4a22a1bf5154b5931870f Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Tue, 12 Nov 2024 06:25:00 -0800
Subject: [PATCH 3/3] Address 2nd comments from nocchijiang

---
 llvm/lib/CodeGen/GlobalMergeFunctions.cpp | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
index a0b3bdf8efc575..502e1a1f274354 100644
--- a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
+++ b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
@@ -102,7 +102,7 @@ bool isEligibleFunction(Function *F) {
   return true;
 }
 
-static bool isEligibleInstrunctionForConstantSharing(const Instruction *I) {
+static bool isEligibleInstructionForConstantSharing(const Instruction *I) {
   switch (I->getOpcode()) {
   case Instruction::Load:
   case Instruction::Store:
@@ -114,10 +114,15 @@ static bool isEligibleInstrunctionForConstantSharing(const Instruction *I) {
   }
 }
 
+// This function takes an instruction, \p I, and an operand index, \p OpIdx.
+// It returns true if the operand should be ignored in the hash computation.
+// If \p OpIdx is out of range based on the other instruction context, it cannot
+// be ignored.
 static bool ignoreOp(const Instruction *I, unsigned OpIdx) {
-  assert(OpIdx < I->getNumOperands() && "Invalid operand index");
+  if (OpIdx >= I->getNumOperands())
+    return false;
 
-  if (!isEligibleInstrunctionForConstantSharing(I))
+  if (!isEligibleInstructionForConstantSharing(I))
     return false;
 
   if (!isa<Constant>(I->getOperand(OpIdx)))



More information about the llvm-commits mailing list