[llvm] 386aa2a - [FuncSpec] Increase the maximum number of times the specializer can run.

Tue Aug 22 01:40:56 PDT 2023

Author: Alexandros Lamprineas
Date: 2023-08-22T09:36:12+01:00
New Revision: 386aa2ab9d19c783deb9fbfb3b9be14754ba8789

URL: https://github.com/llvm/llvm-project/commit/386aa2ab9d19c783deb9fbfb3b9be14754ba8789
DIFF: https://github.com/llvm/llvm-project/commit/386aa2ab9d19c783deb9fbfb3b9be14754ba8789.diff

LOG: [FuncSpec] Increase the maximum number of times the specializer can run.

* Changes the default value of FuncSpecMaxIters from 1 to 10.
  This allows specialization of recursive functions.
* Adds an option to control the maximum codesize growth per function.
* Measured ~45% performance uplift for SPEC2017:548.exchange2_r on
  AWS Graviton3.

Differential Revision: https://reviews.llvm.org/D145819

Added: 
    llvm/test/Transforms/FunctionSpecialization/recursive-penalty.ll

Modified: 
    llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
    llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
    llvm/lib/Transforms/IPO/SCCP.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index f87dccba0eda0a..04d0e21e4f8f00 100644

--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -225,6 +225,7 @@ class FunctionSpecializer {
   SmallPtrSet<Function *, 32> Specializations;
   SmallPtrSet<Function *, 32> FullySpecialized;
   DenseMap<Function *, CodeMetrics> FunctionMetrics;
+  DenseMap<Function *, unsigned> FunctionGrowth;
 
 public:
   FunctionSpecializer(

diff  --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 42ae6f5cd693b7..1206d0a574fe9f 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -93,6 +93,10 @@ static cl::opt<unsigned> MinFunctionSize(
     "Don't specialize functions that have less than this number of "
     "instructions"));
 
+static cl::opt<unsigned> MaxCodeSizeGrowth(
+    "funcspec-max-codesize-growth", cl::init(3), cl::Hidden, cl::desc(
+    "Maximum codesize growth allowed per function"));
+
 static cl::opt<unsigned> MinCodeSizeSavings(
     "funcspec-min-codesize-savings", cl::init(20), cl::Hidden, cl::desc(
     "Reject specializations whose codesize savings are less than this"
@@ -841,7 +845,10 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
                         << B.CodeSize << ", Latency = " << B.Latency
                         << ", Inlining = " << Score << "}\n");
 
-      auto IsProfitable = [&FuncSize](Bonus &B, unsigned Score) -> bool {
+      FunctionGrowth[F] += FuncSize - B.CodeSize;
+
+      auto IsProfitable = [](Bonus &B, unsigned Score, unsigned FuncSize,
+                             unsigned FuncGrowth) -> bool {
         // No check required.
         if (ForceSpecialization)
           return true;
@@ -854,11 +861,14 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
         // Minimum latency savings.
         if (B.Latency < MinLatencySavings * FuncSize / 100)
           return false;
+        // Maximum codesize growth.
+        if (FuncGrowth / FuncSize > MaxCodeSizeGrowth)
+          return false;
         return true;
       };
 
       // Discard unprofitable specialisations.
-      if (!IsProfitable(B, Score))
+      if (!IsProfitable(B, Score, FuncSize, FunctionGrowth[F]))
         continue;
 
       // Create a new specialisation entry.

diff  --git a/llvm/lib/Transforms/IPO/SCCP.cpp b/llvm/lib/Transforms/IPO/SCCP.cpp
index e2e6364df9065a..84f5bbf7039416 100644
--- a/llvm/lib/Transforms/IPO/SCCP.cpp
+++ b/llvm/lib/Transforms/IPO/SCCP.cpp
@@ -43,7 +43,7 @@ STATISTIC(NumInstReplaced,
           "Number of instructions replaced with (simpler) instruction");
 
 static cl::opt<unsigned> FuncSpecMaxIters(
-    "funcspec-max-iters", cl::init(1), cl::Hidden, cl::desc(
+    "funcspec-max-iters", cl::init(10), cl::Hidden, cl::desc(
     "The maximum number of iterations function specialization is run"));
 
 static void findReturnsToZap(Function &F,

diff  --git a/llvm/test/Transforms/FunctionSpecialization/recursive-penalty.ll b/llvm/test/Transforms/FunctionSpecialization/recursive-penalty.ll
new file mode 100644
index 00000000000000..fc17387dec94dd
--- /dev/null
+++ b/llvm/test/Transforms/FunctionSpecialization/recursive-penalty.ll
@@ -0,0 +1,64 @@
+; REQUIRES: asserts
+; RUN: opt -passes="ipsccp<func-spec>,inline,instcombine,simplifycfg" -S \
+; RUN:     -funcspec-min-function-size=23 -funcspec-max-iters=100 \
+; RUN:     -debug-only=function-specialization < %s 2>&1 | FileCheck %s
+
+; Make sure the number of specializations created are not
+; linear to the number of iterations (funcspec-max-iters).
+
+; CHECK: FnSpecialization: Created 4 specializations in module
+
+ at Global = internal constant i32 1, align 4
+
+define internal void @recursiveFunc(ptr readonly %arg) {
+  %temp = alloca i32, align 4
+  %arg.load = load i32, ptr %arg, align 4
+  %arg.cmp = icmp slt i32 %arg.load, 10000
+  br i1 %arg.cmp, label %loop1, label %ret.block
+
+loop1:
+  br label %loop2
+
+loop2:
+  br label %loop3
+
+loop3:
+  br label %loop4
+
+loop4:
+  br label %block6
+
+block6:
+  call void @print_val(i32 %arg.load)
+  %arg.add = add nsw i32 %arg.load, 1
+  store i32 %arg.add, ptr %temp, align 4
+  call void @recursiveFunc(ptr %temp)
+  br label %loop4.end
+
+loop4.end:
+  %exit_cond1 = call i1 @exit_cond()
+  br i1 %exit_cond1, label %loop4, label %loop3.end
+
+loop3.end:
+  %exit_cond2 = call i1 @exit_cond()
+  br i1 %exit_cond2, label %loop3, label %loop2.end
+
+loop2.end:
+  %exit_cond3 = call i1 @exit_cond()
+  br i1 %exit_cond3, label %loop2, label %loop1.end
+
+loop1.end:
+  %exit_cond4 = call i1 @exit_cond()
+  br i1 %exit_cond4, label %loop1, label %ret.block
+
+ret.block:
+  ret void
+}
+
+define i32 @main() {
+  call void @recursiveFunc(ptr @Global)
+  ret i32 0
+}
+
+declare dso_local void @print_val(i32)
+declare dso_local i1 @exit_cond()