[llvm] [FuncSpec] Improve accounting of specialization codesize growth (PR #113448)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 23 05:01:37 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-function-specialization
Author: Hari Limaye (hazzlim)
<details>
<summary>Changes</summary>
Only accumulate the codesize increase of functions that are actually
specialized, rather than for every candidate specialization that we
analyse.
This fixes a subtle bug where prior analysis of candidate
specializations that were deemed unprofitable could prevent subsequent
profitable candidates from being recognised.
---
Full diff: https://github.com/llvm/llvm-project/pull/113448.diff
3 Files Affected:
- (modified) llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h (+7-4)
- (modified) llvm/lib/Transforms/IPO/FunctionSpecialization.cpp (+25-19)
- (added) llvm/test/Transforms/FunctionSpecialization/function-specialization-maxgrowth.ll (+97)
``````````diff
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index 5920dde9d77dfd..4d0be553aa6ede 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -131,13 +131,16 @@ struct Spec {
// Profitability of the specialization.
unsigned Score;
+ // Cost of the specialization, in terms of codesize.
+ unsigned CodeSizeCost;
+
// List of call sites, matching this specialization.
SmallVector<CallBase *> CallSites;
- Spec(Function *F, const SpecSig &S, unsigned Score)
- : F(F), Sig(S), Score(Score) {}
- Spec(Function *F, const SpecSig &&S, unsigned Score)
- : F(F), Sig(S), Score(Score) {}
+ Spec(Function *F, const SpecSig &S, unsigned Score, unsigned CodeSizeCost)
+ : F(F), Sig(S), Score(Score), CodeSizeCost(CodeSizeCost) {}
+ Spec(Function *F, const SpecSig &&S, unsigned Score, unsigned CodeSizeCost)
+ : F(F), Sig(S), Score(Score), CodeSizeCost(CodeSizeCost) {}
};
class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 20249a20a37e41..35865d7213acf4 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -646,6 +646,18 @@ FunctionSpecializer::~FunctionSpecializer() {
cleanUpSSA();
}
+/// Get the unsigned Value of given Cost object. Assumes the Cost is always
+/// non-negative, which is true for both TCK_CodeSize and TCK_Latency, and
+/// always Valid.
+static unsigned getCostValue(const Cost &C) {
+ int64_t Value = *C.getValue();
+
+ assert(Value >= 0 && "CodeSize and Latency cannot be negative");
+ // It is safe to down cast since we know the arguments cannot be negative and
+ // Cost is of type int64_t.
+ return static_cast<unsigned>(Value);
+}
+
/// Attempt to specialize functions in the module to enable constant
/// propagation across function boundaries.
///
@@ -759,6 +771,14 @@ bool FunctionSpecializer::run() {
SmallVector<Function *> Clones;
for (unsigned I = 0; I < NSpecs; ++I) {
Spec &S = AllSpecs[BestSpecs[I]];
+
+ // Check that creating this specialization doesn't exceed the maximum
+ // codesize growth.
+ unsigned FuncSize = getCostValue(FunctionMetrics[S.F].NumInsts);
+ if ((FunctionGrowth[S.F] + S.CodeSizeCost) / FuncSize > MaxCodeSizeGrowth)
+ continue;
+ FunctionGrowth[S.F] += S.CodeSizeCost;
+
S.Clone = createSpecialization(S.F, S.Sig);
// Update the known call sites to call the clone.
@@ -837,18 +857,6 @@ static Function *cloneCandidateFunction(Function *F, unsigned NSpecs) {
return Clone;
}
-/// Get the unsigned Value of given Cost object. Assumes the Cost is always
-/// non-negative, which is true for both TCK_CodeSize and TCK_Latency, and
-/// always Valid.
-static unsigned getCostValue(const Cost &C) {
- int64_t Value = *C.getValue();
-
- assert(Value >= 0 && "CodeSize and Latency cannot be negative");
- // It is safe to down cast since we know the arguments cannot be negative and
- // Cost is of type int64_t.
- return static_cast<unsigned>(Value);
-}
-
bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
SmallVectorImpl<Spec> &AllSpecs,
SpecMap &SM) {
@@ -924,16 +932,14 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
}
CodeSize += Visitor.getCodeSizeSavingsFromPendingPHIs();
+ unsigned CodeSizeSavings = getCostValue(CodeSize);
+ unsigned CodeSizeCost = FuncSize - CodeSizeSavings;
+
auto IsProfitable = [&]() -> bool {
// No check required.
if (ForceSpecialization)
return true;
- unsigned CodeSizeSavings = getCostValue(CodeSize);
- // TODO: We should only accumulate codesize increase of specializations
- // that are actually created.
- FunctionGrowth[F] += FuncSize - CodeSizeSavings;
-
LLVM_DEBUG(
dbgs() << "FnSpecialization: Specialization bonus {Inlining = "
<< Score << " (" << (Score * 100 / FuncSize) << "%)}\n");
@@ -964,7 +970,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
if (LatencySavings < MinLatencySavings * FuncSize / 100)
return false;
// Maximum codesize growth.
- if (FunctionGrowth[F] / FuncSize > MaxCodeSizeGrowth)
+ if ((FunctionGrowth[F] + CodeSizeCost) / FuncSize > MaxCodeSizeGrowth)
return false;
Score += std::max(CodeSizeSavings, LatencySavings);
@@ -976,7 +982,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
continue;
// Create a new specialisation entry.
- auto &Spec = AllSpecs.emplace_back(F, S, Score);
+ auto &Spec = AllSpecs.emplace_back(F, S, Score, CodeSizeCost);
if (CS.getFunction() != F)
Spec.CallSites.push_back(&CS);
const unsigned Index = AllSpecs.size() - 1;
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-maxgrowth.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-maxgrowth.ll
new file mode 100644
index 00000000000000..b4aea30f6b34d3
--- /dev/null
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-maxgrowth.ll
@@ -0,0 +1,97 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
+; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=1 \
+; RUN: -funcspec-for-literal-constant=true \
+; RUN: -funcspec-min-codesize-savings=50 \
+; RUN: -funcspec-min-latency-savings=50 \
+; RUN: -funcspec-max-codesize-growth=1 \
+; RUN: -S < %s | FileCheck %s
+
+; Verify that we are able to specialize a function successfully after analysis
+; of other specializations that are found to not be profitable.
+define void @test_specialize_after_failed_analysis(i32 %n) {
+entry:
+ %notspec0 = call i32 @add0(i32 0, i32 %n)
+ %notspec1 = call i32 @add0(i32 1, i32 %n)
+ %spec = call i32 @add0(i32 1, i32 1)
+ ret void
+}
+
+define i32 @add0(i32 %x, i32 %y) {
+entry:
+ %res = add i32 %x, %y
+ ret i32 %res
+}
+
+; Verify that we do not specialize once maximum codesize growth has been
+; exceeded.
+define void @test_max_codesize_growth_exceeded(i32 %n) {
+entry:
+ %spec0 = call i32 @add1(i32 0, i32 0)
+ %spec1 = call i32 @add1(i32 1, i32 1)
+ %spec2 = call i32 @add1(i32 2, i32 2)
+ %notspec = call i32 @add1(i32 3, i32 3)
+ ret void
+}
+
+define i32 @add1(i32 %x, i32 %y) {
+entry:
+ %res = add i32 %x, %y
+ ret i32 %res
+}
+
+; CHECK-LABEL: define void @test_specialize_after_failed_analysis(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[NOTSPEC0:%.*]] = call i32 @add0(i32 0, i32 [[N]])
+; CHECK-NEXT: [[NOTSPEC1:%.*]] = call i32 @add0(i32 1, i32 [[N]])
+; CHECK-NEXT: [[SPEC:%.*]] = call i32 @add0.specialized.1(i32 1, i32 1)
+; CHECK-NEXT: ret void
+;
+;
+; CHECK-LABEL: define i32 @add0(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[RES:%.*]] = add i32 [[X]], [[Y]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+;
+; CHECK-LABEL: define void @test_max_codesize_growth_exceeded(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[SPEC0:%.*]] = call i32 @add1.specialized.2(i32 0, i32 0)
+; CHECK-NEXT: [[SPEC1:%.*]] = call i32 @add1.specialized.3(i32 1, i32 1)
+; CHECK-NEXT: [[SPEC2:%.*]] = call i32 @add1.specialized.4(i32 2, i32 2)
+; CHECK-NEXT: [[NOTSPEC:%.*]] = call i32 @add1(i32 3, i32 3)
+; CHECK-NEXT: ret void
+;
+;
+; CHECK-LABEL: define i32 @add1(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[RES:%.*]] = add i32 [[X]], [[Y]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+;
+; CHECK-LABEL: define internal i32 @add0.specialized.1(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret i32 poison
+;
+;
+; CHECK-LABEL: define internal i32 @add1.specialized.2(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret i32 poison
+;
+;
+; CHECK-LABEL: define internal i32 @add1.specialized.3(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret i32 poison
+;
+;
+; CHECK-LABEL: define internal i32 @add1.specialized.4(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret i32 poison
+;
``````````
</details>
https://github.com/llvm/llvm-project/pull/113448
More information about the llvm-commits
mailing list