[llvm-branch-commits] [llvm] [CGData] Refactor Global Merge Functions (PR #115750)

Mon Nov 11 21:31:19 PST 2024

================
@@ -420,101 +412,79 @@ static ParamLocsVecTy computeParamInfo(
 bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
   bool Changed = false;
 
-  // Build a map from stable function name to function.
-  StringMap<Function *> StableNameToFuncMap;
-  for (auto &F : M)
-    StableNameToFuncMap[get_stable_name(F.getName())] = &F;
-  // Track merged functions
-  DenseSet<Function *> MergedFunctions;
-
-  auto ModId = M.getModuleIdentifier();
-  for (auto &[Hash, SFS] : FunctionMap->getFunctionMap()) {
-    // Parameter locations based on the unique hash sequences
-    // across the candidates.
+  // Collect stable functions related to the current module.
+  DenseMap<stable_hash, SmallVector<Function *>> HashToFuncs;
+  DenseMap<Function *, FunctionHashInfo> FuncToFI;
+  auto &Maps = FunctionMap->getFunctionMap();
+  for (auto &F : M) {
+    if (!isEligibleFunction(&F))
+      continue;
+    auto FI = llvm::StructuralHashWithDifferences(F, ignoreOp);
+    if (Maps.contains(FI.FunctionHash)) {
+      HashToFuncs[FI.FunctionHash].push_back(&F);
+      FuncToFI.try_emplace(&F, std::move(FI));
+    }
+  }
+
+  for (auto &[Hash, Funcs] : HashToFuncs) {
     std::optional<ParamLocsVecTy> ParamLocsVec;
-    Function *MergedFunc = nullptr;
-    std::string MergedModId;
     SmallVector<FuncMergeInfo> FuncMergeInfos;
-    for (auto &SF : SFS) {
-      // Get the function from the stable name.
-      auto I = StableNameToFuncMap.find(
-          *FunctionMap->getNameForId(SF->FunctionNameId));
-      if (I == StableNameToFuncMap.end())
-        continue;
-      Function *F = I->second;
-      assert(F);
-      // Skip if the function has been merged before.
-      if (MergedFunctions.count(F))
-        continue;
-      // Consider the function if it is eligible for merging.
-      if (!isEligibleFunction(F))
-        continue;
 
-      auto FI = llvm::StructuralHashWithDifferences(*F, ignoreOp);
-      uint64_t FuncHash = FI.FunctionHash;
-      if (Hash != FuncHash) {
-        ++NumMismatchedFunctionHash;
-        continue;
-      }
+    // Iterate functions with the same hash.
+    for (auto &F : Funcs) {
+      auto &SFS = Maps.at(Hash);
+      auto &FI = FuncToFI.at(F);
 
-      if (SF->InstCount != FI.IndexInstruction->size()) {
-        ++NumMismatchedInstCount;
+      // Check if the function is compatible with any stable function
+      // in terms of the number of instructions and ignored operands.
+      assert(!SFS.empty());
+      auto &RFS = SFS[0];
----------------
kyulee-com wrote:

If the codegen data is not stale —meaning there has been no source change in the first or second pass— the size regression is less of a concern. My main worry was about the stability of optimization when the codegen data becomes outdated.

https://github.com/llvm/llvm-project/pull/115750