[llvm] 386aa2a - [FuncSpec] Increase the maximum number of times the specializer can run.
Alexandros Lamprineas via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 22 01:40:56 PDT 2023
Author: Alexandros Lamprineas
Date: 2023-08-22T09:36:12+01:00
New Revision: 386aa2ab9d19c783deb9fbfb3b9be14754ba8789
URL: https://github.com/llvm/llvm-project/commit/386aa2ab9d19c783deb9fbfb3b9be14754ba8789
DIFF: https://github.com/llvm/llvm-project/commit/386aa2ab9d19c783deb9fbfb3b9be14754ba8789.diff
LOG: [FuncSpec] Increase the maximum number of times the specializer can run.
* Changes the default value of FuncSpecMaxIters from 1 to 10.
This allows specialization of recursive functions.
* Adds an option to control the maximum codesize growth per function.
* Measured ~45% performance uplift for SPEC2017:548.exchange2_r on
AWS Graviton3.
Differential Revision: https://reviews.llvm.org/D145819
Added:
llvm/test/Transforms/FunctionSpecialization/recursive-penalty.ll
Modified:
llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
llvm/lib/Transforms/IPO/SCCP.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index f87dccba0eda0a..04d0e21e4f8f00 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -225,6 +225,7 @@ class FunctionSpecializer {
SmallPtrSet<Function *, 32> Specializations;
SmallPtrSet<Function *, 32> FullySpecialized;
DenseMap<Function *, CodeMetrics> FunctionMetrics;
+ DenseMap<Function *, unsigned> FunctionGrowth;
public:
FunctionSpecializer(
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 42ae6f5cd693b7..1206d0a574fe9f 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -93,6 +93,10 @@ static cl::opt<unsigned> MinFunctionSize(
"Don't specialize functions that have less than this number of "
"instructions"));
+static cl::opt<unsigned> MaxCodeSizeGrowth(
+ "funcspec-max-codesize-growth", cl::init(3), cl::Hidden, cl::desc(
+ "Maximum codesize growth allowed per function"));
+
static cl::opt<unsigned> MinCodeSizeSavings(
"funcspec-min-codesize-savings", cl::init(20), cl::Hidden, cl::desc(
"Reject specializations whose codesize savings are less than this"
@@ -841,7 +845,10 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
<< B.CodeSize << ", Latency = " << B.Latency
<< ", Inlining = " << Score << "}\n");
- auto IsProfitable = [&FuncSize](Bonus &B, unsigned Score) -> bool {
+ FunctionGrowth[F] += FuncSize - B.CodeSize;
+
+ auto IsProfitable = [](Bonus &B, unsigned Score, unsigned FuncSize,
+ unsigned FuncGrowth) -> bool {
// No check required.
if (ForceSpecialization)
return true;
@@ -854,11 +861,14 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
// Minimum latency savings.
if (B.Latency < MinLatencySavings * FuncSize / 100)
return false;
+ // Maximum codesize growth.
+ if (FuncGrowth / FuncSize > MaxCodeSizeGrowth)
+ return false;
return true;
};
// Discard unprofitable specialisations.
- if (!IsProfitable(B, Score))
+ if (!IsProfitable(B, Score, FuncSize, FunctionGrowth[F]))
continue;
// Create a new specialisation entry.
diff --git a/llvm/lib/Transforms/IPO/SCCP.cpp b/llvm/lib/Transforms/IPO/SCCP.cpp
index e2e6364df9065a..84f5bbf7039416 100644
--- a/llvm/lib/Transforms/IPO/SCCP.cpp
+++ b/llvm/lib/Transforms/IPO/SCCP.cpp
@@ -43,7 +43,7 @@ STATISTIC(NumInstReplaced,
"Number of instructions replaced with (simpler) instruction");
static cl::opt<unsigned> FuncSpecMaxIters(
- "funcspec-max-iters", cl::init(1), cl::Hidden, cl::desc(
+ "funcspec-max-iters", cl::init(10), cl::Hidden, cl::desc(
"The maximum number of iterations function specialization is run"));
static void findReturnsToZap(Function &F,
diff --git a/llvm/test/Transforms/FunctionSpecialization/recursive-penalty.ll b/llvm/test/Transforms/FunctionSpecialization/recursive-penalty.ll
new file mode 100644
index 00000000000000..fc17387dec94dd
--- /dev/null
+++ b/llvm/test/Transforms/FunctionSpecialization/recursive-penalty.ll
@@ -0,0 +1,64 @@
+; REQUIRES: asserts
+; RUN: opt -passes="ipsccp<func-spec>,inline,instcombine,simplifycfg" -S \
+; RUN: -funcspec-min-function-size=23 -funcspec-max-iters=100 \
+; RUN: -debug-only=function-specialization < %s 2>&1 | FileCheck %s
+
+; Make sure the number of specializations created are not
+; linear to the number of iterations (funcspec-max-iters).
+
+; CHECK: FnSpecialization: Created 4 specializations in module
+
+ at Global = internal constant i32 1, align 4
+
+define internal void @recursiveFunc(ptr readonly %arg) {
+ %temp = alloca i32, align 4
+ %arg.load = load i32, ptr %arg, align 4
+ %arg.cmp = icmp slt i32 %arg.load, 10000
+ br i1 %arg.cmp, label %loop1, label %ret.block
+
+loop1:
+ br label %loop2
+
+loop2:
+ br label %loop3
+
+loop3:
+ br label %loop4
+
+loop4:
+ br label %block6
+
+block6:
+ call void @print_val(i32 %arg.load)
+ %arg.add = add nsw i32 %arg.load, 1
+ store i32 %arg.add, ptr %temp, align 4
+ call void @recursiveFunc(ptr %temp)
+ br label %loop4.end
+
+loop4.end:
+ %exit_cond1 = call i1 @exit_cond()
+ br i1 %exit_cond1, label %loop4, label %loop3.end
+
+loop3.end:
+ %exit_cond2 = call i1 @exit_cond()
+ br i1 %exit_cond2, label %loop3, label %loop2.end
+
+loop2.end:
+ %exit_cond3 = call i1 @exit_cond()
+ br i1 %exit_cond3, label %loop2, label %loop1.end
+
+loop1.end:
+ %exit_cond4 = call i1 @exit_cond()
+ br i1 %exit_cond4, label %loop1, label %ret.block
+
+ret.block:
+ ret void
+}
+
+define i32 @main() {
+ call void @recursiveFunc(ptr @Global)
+ ret i32 0
+}
+
+declare dso_local void @print_val(i32)
+declare dso_local i1 @exit_cond()
More information about the llvm-commits
mailing list