[llvm] fdf1f69 - [CGData][GMF] Skip No Params (#116548)

via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 25 10:57:45 PST 2024


Author: Kyungwoo Lee
Date: 2024-11-25T10:57:41-08:00
New Revision: fdf1f69c57ac3667d27c35e097040284edb1f574

URL: https://github.com/llvm/llvm-project/commit/fdf1f69c57ac3667d27c35e097040284edb1f574
DIFF: https://github.com/llvm/llvm-project/commit/fdf1f69c57ac3667d27c35e097040284edb1f574.diff

LOG: [CGData][GMF] Skip No Params (#116548)

This update follows up on change #112671 and is mostly a NFC, with the following exceptions:
  - Introduced `-global-merging-skip-no-params` to bypass merging when no parameters are required.
  - Parameter count is now calculated based on the unique hash count.
  - Added `-global-merging-inst-overhead` to adjust the instruction overhead, reflecting the machine instruction size.
  - Costs and benefits are now computed using the double data type. Since the finalization process occurs offline, this should not significantly impact build time.
  - Moved a sorting operation outside of the loop.

This is a patch for
https://discourse.llvm.org/t/rfc-global-function-merging/82608.

Added: 
    llvm/test/CodeGen/Generic/cgdata-merge-local.ll
    llvm/test/CodeGen/Generic/cgdata-merge-no-params.ll

Modified: 
    llvm/lib/CGData/StableFunctionMap.cpp
    llvm/lib/CodeGen/GlobalMergeFunctions.cpp

Removed: 
    llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll


################################################################################
diff  --git a/llvm/lib/CGData/StableFunctionMap.cpp b/llvm/lib/CGData/StableFunctionMap.cpp
index fe7be0c0b6e7b8..4afe77d78a4fe5 100644
--- a/llvm/lib/CGData/StableFunctionMap.cpp
+++ b/llvm/lib/CGData/StableFunctionMap.cpp
@@ -14,6 +14,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CGData/StableFunctionMap.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 
@@ -35,21 +36,30 @@ static cl::opt<unsigned> GlobalMergingMaxParams(
     cl::desc(
         "The maximum number of parameters allowed when merging functions."),
     cl::init(std::numeric_limits<unsigned>::max()), cl::Hidden);
-static cl::opt<unsigned> GlobalMergingParamOverhead(
+static cl::opt<bool> GlobalMergingSkipNoParams(
+    "global-merging-skip-no-params",
+    cl::desc("Skip merging functions with no parameters."), cl::init(true),
+    cl::Hidden);
+static cl::opt<double> GlobalMergingInstOverhead(
+    "global-merging-inst-overhead",
+    cl::desc("The overhead cost associated with each instruction when lowering "
+             "to machine instruction."),
+    cl::init(1.2), cl::Hidden);
+static cl::opt<double> GlobalMergingParamOverhead(
     "global-merging-param-overhead",
     cl::desc("The overhead cost associated with each parameter when merging "
              "functions."),
-    cl::init(2), cl::Hidden);
-static cl::opt<unsigned>
+    cl::init(2.0), cl::Hidden);
+static cl::opt<double>
     GlobalMergingCallOverhead("global-merging-call-overhead",
                               cl::desc("The overhead cost associated with each "
                                        "function call when merging functions."),
-                              cl::init(1), cl::Hidden);
-static cl::opt<unsigned> GlobalMergingExtraThreshold(
+                              cl::init(1.0), cl::Hidden);
+static cl::opt<double> GlobalMergingExtraThreshold(
     "global-merging-extra-threshold",
     cl::desc("An additional cost threshold that must be exceeded for merging "
              "to be considered beneficial."),
-    cl::init(0), cl::Hidden);
+    cl::init(0.0), cl::Hidden);
 
 unsigned StableFunctionMap::getIdOrCreateForName(StringRef Name) {
   auto It = NameToId.find(Name);
@@ -160,21 +170,32 @@ static bool isProfitable(
   if (InstCount < GlobalMergingMinInstrs)
     return false;
 
-  unsigned ParamCount = SFS[0]->IndexOperandHashMap->size();
-  if (ParamCount > GlobalMergingMaxParams)
-    return false;
-
-  unsigned Benefit = InstCount * (StableFunctionCount - 1);
-  unsigned Cost =
-      (GlobalMergingParamOverhead * ParamCount + GlobalMergingCallOverhead) *
-          StableFunctionCount +
-      GlobalMergingExtraThreshold;
+  double Cost = 0.0;
+  SmallSet<stable_hash, 8> UniqueHashVals;
+  for (auto &SF : SFS) {
+    UniqueHashVals.clear();
+    for (auto &[IndexPair, Hash] : *SF->IndexOperandHashMap)
+      UniqueHashVals.insert(Hash);
+    unsigned ParamCount = UniqueHashVals.size();
+    if (ParamCount > GlobalMergingMaxParams)
+      return false;
+    // Theoretically, if ParamCount is 0, it results in identical code folding
+    // (ICF), which we can skip merging here since the linker already handles
+    // ICF. This pass would otherwise introduce unnecessary thunks that are
+    // merely direct jumps. However, enabling this could be beneficial depending
+    // on downstream passes, so we provide an option for it.
+    if (GlobalMergingSkipNoParams && ParamCount == 0)
+      return false;
+    Cost += ParamCount * GlobalMergingParamOverhead + GlobalMergingCallOverhead;
+  }
+  Cost += GlobalMergingExtraThreshold;
 
+  double Benefit =
+      InstCount * (StableFunctionCount - 1) * GlobalMergingInstOverhead;
   bool Result = Benefit > Cost;
   LLVM_DEBUG(dbgs() << "isProfitable: Hash = " << SFS[0]->Hash << ", "
                     << "StableFunctionCount = " << StableFunctionCount
                     << ", InstCount = " << InstCount
-                    << ", ParamCount = " << ParamCount
                     << ", Benefit = " << Benefit << ", Cost = " << Cost
                     << ", Result = " << (Result ? "true" : "false") << "\n");
   return Result;

diff  --git a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
index c8f1b98c9a18e9..470582885fab0c 100644
--- a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
+++ b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
@@ -405,12 +405,13 @@ static ParamLocsVecTy computeParamInfo(
   }
 
   ParamLocsVecTy ParamLocsVec;
-  for (auto &[HashSeq, Locs] : HashSeqToLocs) {
+  for (auto &[HashSeq, Locs] : HashSeqToLocs)
     ParamLocsVec.push_back(std::move(Locs));
-    llvm::sort(ParamLocsVec, [&](const ParamLocs &L, const ParamLocs &R) {
-      return L[0] < R[0];
-    });
-  }
+
+  llvm::sort(ParamLocsVec, [&](const ParamLocs &L, const ParamLocs &R) {
+    return L[0] < R[0];
+  });
+
   return ParamLocsVec;
 }
 

diff  --git a/llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll b/llvm/test/CodeGen/Generic/cgdata-merge-local.ll
similarity index 98%
rename from llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll
rename to llvm/test/CodeGen/Generic/cgdata-merge-local.ll
index 660ffe6109948f..3517fbd3ae8a26 100644
--- a/llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll
+++ b/llvm/test/CodeGen/Generic/cgdata-merge-local.ll
@@ -2,7 +2,7 @@
 ; while parameterizing a 
diff erence in their global variables, g1 and g2.
 ; To achieve this, we create two instances of the global merging function, f1.Tgm and f2.Tgm,
 ; which are tail-called from thunks f1 and f2 respectively.
-; These identical functions, f1.Tgm and f2.Tgm, will be folded by the linker via Identical Code Folding (IFC).
+; These identical functions, f1.Tgm and f2.Tgm, will be folded by the linker via Identical Code Folding (ICF).
 
 ; RUN: opt -S --passes=global-merge-func %s | FileCheck %s
 

diff  --git a/llvm/test/CodeGen/Generic/cgdata-merge-no-params.ll b/llvm/test/CodeGen/Generic/cgdata-merge-no-params.ll
new file mode 100644
index 00000000000000..30d6a45c03a0c3
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/cgdata-merge-no-params.ll
@@ -0,0 +1,42 @@
+; This test verifies whether two identical functions, f1 and f2, can be merged
+; locally using the global merge function.
+; The functions, f1.Tgm and f2.Tgm, will be folded by the linker through
+; Identical Code Folding (ICF).
+; While identical functions can already be folded by the linker, creating this
+; canonical form can be beneficial in downstream passes. This merging process
+; can be controlled by the -global-merging-skip-no-params option.
+
+; RUN: llc -enable-global-merge-func=true -global-merging-skip-no-params=false < %s | FileCheck %s --check-prefix=MERGE
+; RUN: llc -enable-global-merge-func=true -global-merging-skip-no-params=true < %s | FileCheck %s --implicit-check-not=".Tgm"
+
+; MERGE: _f1.Tgm
+; MERGE: _f2.Tgm
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-unknown-ios12.0.0"
+
+ at g = external local_unnamed_addr global [0 x i32], align 4
+ at g1 = external global i32, align 4
+ at g2 = external global i32, align 4
+
+define i32 @f1(i32 %a) {
+entry:
+  %idxprom = sext i32 %a to i64
+  %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4
+  %1 = load volatile i32, i32* @g1, align 4
+  %mul = mul nsw i32 %1, %0
+  %add = add nsw i32 %mul, 1
+  ret i32 %add
+}
+
+define i32 @f2(i32 %a) {
+entry:
+  %idxprom = sext i32 %a to i64
+  %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4
+  %1 = load volatile i32, i32* @g1, align 4
+  %mul = mul nsw i32 %1, %0
+  %add = add nsw i32 %mul, 1
+  ret i32 %add
+}


        


More information about the llvm-commits mailing list