[llvm] [FnSpecialization] Enable function specialization of call chains (PR #163891)
Ryan Buchner via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 17 09:31:04 PDT 2025
https://github.com/bababuck updated https://github.com/llvm/llvm-project/pull/163891
>From 88fbb97e6ef7d401fe7245dbf4988eff8de73282 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 9 Sep 2025 13:44:37 -0700
Subject: [PATCH 01/23] [FnSpecialization] Only accept codesize savings if
strictly greater than the minimum amount
If the knob for minimum code size is turned down low enough, for small functions:
`MinCodeSizeSavings * FuncSize / 100` will evaluate to `0`, and then with strict
less than we will accept Specialization that doesn't lead to any benefit.
---
.../Transforms/IPO/FunctionSpecialization.cpp | 2 +-
.../FunctionSpecialization/dead-gv-load.ll | 4 +-
.../FunctionSpecialization/maxgrowth.ll | 8 +---
.../recursive-penalty.ll | 38 ++++++++++++++++++-
4 files changed, 40 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 150a2dc5d48e2..6d4b2fb7e0065 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -995,7 +995,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
<< (CodeSizeSavings * 100 / FuncSize) << "%)}\n");
// Minimum codesize savings.
- if (CodeSizeSavings < MinCodeSizeSavings * FuncSize / 100)
+ if (CodeSizeSavings <= MinCodeSizeSavings * FuncSize / 100)
return false;
// Lazily compute the Latency, to avoid unnecessarily computing BFI.
diff --git a/llvm/test/Transforms/FunctionSpecialization/dead-gv-load.ll b/llvm/test/Transforms/FunctionSpecialization/dead-gv-load.ll
index 134a79d349035..337780d0de2e4 100644
--- a/llvm/test/Transforms/FunctionSpecialization/dead-gv-load.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/dead-gv-load.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -passes=ipsccp --funcspec-min-function-size=1 -S < %s | FileCheck %s
-
+; RUN: opt -passes=ipsccp --funcspec-min-function-size=1 \
+; RUN: -funcspec-min-codesize-savings=1 -S < %s | FileCheck %s
@gv = internal global ptr null
define i8 @caller() {
diff --git a/llvm/test/Transforms/FunctionSpecialization/maxgrowth.ll b/llvm/test/Transforms/FunctionSpecialization/maxgrowth.ll
index 82d1f7ae4a6e1..7dc7e8ec69f50 100644
--- a/llvm/test/Transforms/FunctionSpecialization/maxgrowth.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/maxgrowth.ll
@@ -26,7 +26,7 @@ entry:
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[NOTSPEC0:%.*]] = call i32 @add(i32 0, i32 [[N]])
; CHECK-NEXT: [[NOTSPEC1:%.*]] = call i32 @add(i32 1, i32 [[N]])
-; CHECK-NEXT: [[SPEC:%.*]] = call i32 @add.specialized.1(i32 1, i32 1)
+; CHECK-NEXT: [[SPEC:%.*]] = call i32 @add(i32 1, i32 1)
; CHECK-NEXT: ret void
;
;
@@ -36,9 +36,3 @@ entry:
; CHECK-NEXT: [[RES:%.*]] = add i32 [[X]], [[Y]]
; CHECK-NEXT: ret i32 [[RES]]
;
-;
-; CHECK-LABEL: define internal i32 @add.specialized.1(
-; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: ret i32 poison
-;
diff --git a/llvm/test/Transforms/FunctionSpecialization/recursive-penalty.ll b/llvm/test/Transforms/FunctionSpecialization/recursive-penalty.ll
index fc17387dec94d..ff90634ddd424 100644
--- a/llvm/test/Transforms/FunctionSpecialization/recursive-penalty.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/recursive-penalty.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; REQUIRES: asserts
; RUN: opt -passes="ipsccp<func-spec>,inline,instcombine,simplifycfg" -S \
; RUN: -funcspec-min-function-size=23 -funcspec-max-iters=100 \
@@ -6,11 +7,40 @@
; Make sure the number of specializations created are not
; linear to the number of iterations (funcspec-max-iters).
-; CHECK: FnSpecialization: Created 4 specializations in module
-
@Global = internal constant i32 1, align 4
define internal void @recursiveFunc(ptr readonly %arg) {
+; CHECK-LABEL: define internal void @recursiveFunc(
+; CHECK-SAME: ptr readonly [[ARG:%.*]]) {
+; CHECK-NEXT: [[TEMP:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[ARG_LOAD:%.*]] = load i32, ptr [[ARG]], align 4
+; CHECK-NEXT: [[ARG_CMP:%.*]] = icmp slt i32 [[ARG_LOAD]], 10000
+; CHECK-NEXT: br i1 [[ARG_CMP]], label %[[LOOP1:.*]], label %[[RET_BLOCK:.*]]
+; CHECK: [[LOOP1]]:
+; CHECK-NEXT: br label %[[LOOP2:.*]]
+; CHECK: [[LOOP2]]:
+; CHECK-NEXT: br label %[[LOOP3:.*]]
+; CHECK: [[LOOP3]]:
+; CHECK-NEXT: br label %[[LOOP4:.*]]
+; CHECK: [[LOOP4]]:
+; CHECK-NEXT: call void @print_val(i32 [[ARG_LOAD]])
+; CHECK-NEXT: [[ARG_ADD:%.*]] = add nsw i32 [[ARG_LOAD]], 1
+; CHECK-NEXT: store i32 [[ARG_ADD]], ptr [[TEMP]], align 4
+; CHECK-NEXT: call void @recursiveFunc(ptr nonnull [[TEMP]])
+; CHECK-NEXT: [[EXIT_COND1:%.*]] = call i1 @exit_cond()
+; CHECK-NEXT: br i1 [[EXIT_COND1]], label %[[LOOP4]], label %[[LOOP3_END:.*]]
+; CHECK: [[LOOP3_END]]:
+; CHECK-NEXT: [[EXIT_COND2:%.*]] = call i1 @exit_cond()
+; CHECK-NEXT: br i1 [[EXIT_COND2]], label %[[LOOP3]], label %[[LOOP2_END:.*]]
+; CHECK: [[LOOP2_END]]:
+; CHECK-NEXT: [[EXIT_COND3:%.*]] = call i1 @exit_cond()
+; CHECK-NEXT: br i1 [[EXIT_COND3]], label %[[LOOP2]], label %[[LOOP1_END:.*]]
+; CHECK: [[LOOP1_END]]:
+; CHECK-NEXT: [[EXIT_COND4:%.*]] = call i1 @exit_cond()
+; CHECK-NEXT: br i1 [[EXIT_COND4]], label %[[LOOP1]], label %[[RET_BLOCK]]
+; CHECK: [[RET_BLOCK]]:
+; CHECK-NEXT: ret void
+;
%temp = alloca i32, align 4
%arg.load = load i32, ptr %arg, align 4
%arg.cmp = icmp slt i32 %arg.load, 10000
@@ -56,6 +86,10 @@ ret.block:
}
define i32 @main() {
+; CHECK-LABEL: define i32 @main() {
+; CHECK-NEXT: call void @recursiveFunc(ptr nonnull @Global)
+; CHECK-NEXT: ret i32 0
+;
call void @recursiveFunc(ptr @Global)
ret i32 0
}
>From 92297fcad87861f9838e1e13b0c0c99646c5e62a Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 30 Sep 2025 09:10:38 -0700
Subject: [PATCH 02/23] [FnSpecialization] Add new test for chained
specialization
---
.../specialize-chain.ll | 132 ++++++++++++++++++
1 file changed, 132 insertions(+)
create mode 100644 llvm/test/Transforms/FunctionSpecialization/specialize-chain.ll
diff --git a/llvm/test/Transforms/FunctionSpecialization/specialize-chain.ll b/llvm/test/Transforms/FunctionSpecialization/specialize-chain.ll
new file mode 100644
index 0000000000000..8b3a028ca1aa7
--- /dev/null
+++ b/llvm/test/Transforms/FunctionSpecialization/specialize-chain.ll
@@ -0,0 +1,132 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
+; REQUIRES: asserts
+; RUN: opt -passes=ipsccp -S -funcspec-min-function-size=1 -funcspec-min-codesize-savings=20 -debug-only=function-specialization < %s 2>&1 | FileCheck %s
+
+define i32 @incr(i32 %a) {
+ %b = add nsw i32 %a, 1
+ %c = add nsw i32 %b, 1
+ %d = add nsw i32 %c, 1
+ %e = add nsw i32 %d, 1
+ %f = add nsw i32 %e, 1
+ %g = add nsw i32 %f, 1
+ ret i32 %g
+}
+
+define i32 @forward_outer(i32 %a) {
+entry:
+ %call = call i32 @forward_inner(i32 %a)
+ ret i32 %call
+}
+
+define i32 @multi_call(i32 %a) {
+entry:
+ %call = call i32 @incr(i32 %a)
+ %mul = mul nsw i32 %a, 2
+ %mul_call = call i32 @incr(i32 %mul)
+ ret i32 %call
+}
+
+define i32 @forward_inner(i32 %a) {
+entry:
+ %call = call i32 @incr(i32 %a)
+ ret i32 %call
+}
+
+define i32 @forward_unfold(i32 %a) {
+entry:
+ %b = mul nsw i32 %a, 10
+ %call = call i32 @incr(i32 %b)
+ %c = mul nsw i32 %call, 20
+ ret i32 %c
+}
+
+define dso_local signext i32 @intrinsic(i64 %a) {
+ %local_dest = alloca [1024 x i32], align 4
+ %local_src = alloca [1024 x i32], align 4
+ call void @llvm.memcpy.p0.p0.i64(ptr %local_dest, ptr %local_src, i64 %a, i1 false)
+ ret i32 0
+}
+
+define i32 @main() {
+entry:
+ %add = call i32 @incr(i32 10)
+ %int = call i32 @intrinsic(i32 3)
+ %fwd_unfold = call i32 @forward_unfold(i32 3)
+ %fwd_inner = call i32 @forward_inner(i32 3)
+ %fwd_outer = call i32 @forward_outer(i32 3)
+ %fwd_outer1 = call i32 @forward_outer(i32 3)
+ %multi_call = call i32 @multi_call(i32 5)
+ ret i32 %multi_call
+}
+
+
+
+
+
+; CHECK-LABEL: define range(i32 -2147483642, -2147483648) i32 @incr(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[A]], 1
+; CHECK-NEXT: [[C:%.*]] = add nsw i32 [[INC]], 1
+; CHECK-NEXT: [[D:%.*]] = add nsw i32 [[C]], 1
+; CHECK-NEXT: [[E:%.*]] = add nsw i32 [[D]], 1
+; CHECK-NEXT: [[F:%.*]] = add nsw i32 [[E]], 1
+; CHECK-NEXT: [[G:%.*]] = add nsw i32 [[F]], 1
+; CHECK-NEXT: ret i32 [[G]]
+;
+;
+; CHECK-LABEL: define range(i32 -2147483642, -2147483648) i32 @forward_outer(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @forward_inner(i32 [[A]])
+; CHECK-NEXT: ret i32 [[CALL]]
+;
+;
+; CHECK-LABEL: define range(i32 -2147483642, -2147483648) i32 @multi_call(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @incr(i32 [[A]])
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[A]], 2
+; CHECK-NEXT: [[MUL_CALL:%.*]] = call i32 @incr(i32 [[MUL]])
+; CHECK-NEXT: ret i32 [[CALL]]
+;
+;
+; CHECK-LABEL: define range(i32 -2147483642, -2147483648) i32 @forward_inner(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @incr(i32 [[A]])
+; CHECK-NEXT: ret i32 [[CALL]]
+;
+;
+; CHECK-LABEL: define i32 @forward_unfold(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[B:%.*]] = mul nsw i32 [[A]], 10
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @incr(i32 [[B]])
+; CHECK-NEXT: [[C:%.*]] = mul nsw i32 [[CALL]], 20
+; CHECK-NEXT: ret i32 [[C]]
+;
+;
+; CHECK-LABEL: define dso_local signext i32 @intrinsic(
+; CHECK-SAME: i64 [[A:%.*]]) {
+; CHECK-NEXT: [[LOCAL_DEST:%.*]] = alloca [1024 x i32], align 4
+; CHECK-NEXT: [[LOCAL_SRC:%.*]] = alloca [1024 x i32], align 4
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[LOCAL_DEST]], ptr [[LOCAL_SRC]], i64 [[A]], i1 false)
+; CHECK-NEXT: ret i32 0
+;
+;
+; CHECK-LABEL: define range(i32 -2147483642, -2147483648) i32 @main() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ADD:%.*]] = call i32 @incr.specialized.1(i32 10)
+; CHECK-NEXT: [[INT:%.*]] = call i32 @intrinsic(i32 3)
+; CHECK-NEXT: [[FWD_UNFOLD:%.*]] = call i32 @forward_unfold(i32 3)
+; CHECK-NEXT: [[FWD_INNER:%.*]] = call i32 @forward_inner(i32 3)
+; CHECK-NEXT: [[FWD_OUTER:%.*]] = call i32 @forward_outer(i32 3)
+; CHECK-NEXT: [[FWD_OUTER1:%.*]] = call i32 @forward_outer(i32 3)
+; CHECK-NEXT: [[MULTI_CALL:%.*]] = call i32 @multi_call(i32 5)
+; CHECK-NEXT: ret i32 [[MULTI_CALL]]
+;
+;
+; CHECK-LABEL: define internal i32 @incr.specialized.1(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT: ret i32 poison
+;
>From 5ea886987a3abfef0ba13320374fb4ebed6e60ef Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Mon, 29 Sep 2025 15:15:25 -0700
Subject: [PATCH 03/23] [FnSpecialization] Refactor SpecCall::CallSites to
contain a data structure
The data structure will eventually contain extra data for chained and indirect
specialization.
---
.../llvm/Transforms/IPO/FunctionSpecialization.h | 10 +++++++++-
llvm/lib/Transforms/IPO/FunctionSpecialization.cpp | 9 +++++----
2 files changed, 14 insertions(+), 5 deletions(-)
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index 5a682e8c7b5eb..120ed2117992c 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -93,6 +93,8 @@
#include "llvm/Transforms/Utils/SizeOpts.h"
namespace llvm {
+struct Spec;
+
// Map of potential specializations for each function. The FunctionSpecializer
// keeps the discovered specialisation opportunities for the module in a single
// vector, where the specialisations of each function form a contiguous range.
@@ -124,6 +126,10 @@ struct SpecSig {
}
};
+struct SpecCall {
+ CallBase *CallSite;
+};
+
// Specialization instance.
struct Spec {
// Original function.
@@ -142,7 +148,9 @@ struct Spec {
unsigned CodeSize;
// List of call sites, matching this specialization.
- SmallVector<CallBase *> CallSites;
+ SmallVector<SpecCall> CallSites;
+
+ void addCall(SpecCall SC) { CallSites.push_back(SC); }
Spec(Function *F, const SpecSig &S, unsigned Score, unsigned CodeSize)
: F(F), Sig(S), Score(Score), CodeSize(CodeSize) {}
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 6d4b2fb7e0065..6743fffd585df 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -790,11 +790,12 @@ bool FunctionSpecializer::run() {
S.Clone = createSpecialization(S.F, S.Sig);
// Update the known call sites to call the clone.
- for (CallBase *Call : S.CallSites) {
+ for (auto &CS : S.CallSites) {
Function *Clone = S.Clone;
+ CallBase *Call = CS.CallSite;
LLVM_DEBUG(dbgs() << "FnSpecialization: Redirecting " << *Call
<< " to call " << Clone->getName() << "\n");
- Call->setCalledFunction(S.Clone);
+ Call->setCalledFunction(Clone);
auto &BFI = GetBFI(*Call->getFunction());
std::optional<uint64_t> Count =
BFI.getBlockProfileCount(Call->getParent());
@@ -961,7 +962,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
if (CS.getFunction() == F)
continue;
const unsigned Index = It->second;
- AllSpecs[Index].CallSites.push_back(&CS);
+ AllSpecs[Index].addCall({&CS});
} else {
// Calculate the specialisation gain.
Cost CodeSize;
@@ -1025,7 +1026,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
// Create a new specialisation entry.
auto &Spec = AllSpecs.emplace_back(F, S, Score, SpecSize);
if (CS.getFunction() != F)
- Spec.CallSites.push_back(&CS);
+ Spec.addCall({&CS});
const unsigned Index = AllSpecs.size() - 1;
UniqueSpecs[S] = Index;
if (auto [It, Inserted] = SM.try_emplace(F, Index, Index + 1); !Inserted)
>From ca8182b63471435e66cce4a13372f6a32bcfe994 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Mon, 29 Sep 2025 15:30:02 -0700
Subject: [PATCH 04/23] [FnSpecialization] Refactor main loop in run() to pull
out the loop logic to its own function
Will want to call recursively for chains.
---
.../Transforms/IPO/FunctionSpecialization.h | 7 ++
.../Transforms/IPO/FunctionSpecialization.cpp | 109 +++++++++---------
2 files changed, 64 insertions(+), 52 deletions(-)
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index 120ed2117992c..b594a01aeac21 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -307,6 +307,13 @@ class FunctionSpecializer {
bool findSpecializations(Function *F, unsigned FuncSize,
SmallVectorImpl<Spec> &AllSpecs, SpecMap &SM);
+ /// @brief Find specialization opportunities for a given function.
+ /// @param F Function to specialize
+ /// @param SM A map for a function's specialisation range
+ /// @param AllSpecs A vector to add potential specializations to.
+ /// @return True, if any potential specializations were found
+ bool runOneSpec(Function &F, SpecMap &SM, SmallVectorImpl<Spec> &AllSpecs);
+
/// Compute the inlining bonus for replacing argument \p A with constant \p C.
unsigned getInliningBonus(Argument *A, Constant *C);
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 6743fffd585df..af57c0ce3394a 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -668,6 +668,61 @@ static unsigned getCostValue(const Cost &C) {
return static_cast<unsigned>(Value);
}
+bool FunctionSpecializer::runOneSpec(Function &F, SpecMap &SM,
+ SmallVectorImpl<Spec> &AllSpecs) {
+ if (!isCandidateFunction(&F))
+ return false;
+
+ auto [It, Inserted] = FunctionMetrics.try_emplace(&F);
+ CodeMetrics &Metrics = It->second;
+ // Analyze the function.
+ if (Inserted) {
+ SmallPtrSet<const Value *, 32> EphValues;
+ CodeMetrics::collectEphemeralValues(&F, &GetAC(F), EphValues);
+ for (BasicBlock &BB : F)
+ Metrics.analyzeBasicBlock(&BB, GetTTI(F), EphValues);
+ }
+
+ // When specializing literal constants is enabled, always require functions
+ // to be larger than MinFunctionSize, to prevent excessive specialization.
+ const bool RequireMinSize =
+ !ForceSpecialization &&
+ (SpecializeLiteralConstant || !F.hasFnAttribute(Attribute::NoInline));
+
+ // If the code metrics reveal that we shouldn't duplicate the function,
+ // or if the code size implies that this function is easy to get inlined,
+ // then we shouldn't specialize it.
+ if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() ||
+ (RequireMinSize && Metrics.NumInsts < MinFunctionSize))
+ return false;
+
+ // When specialization on literal constants is disabled, only consider
+ // recursive functions when running multiple times to save wasted analysis,
+ // as we will not be able to specialize on any newly found literal constant
+ // return values.
+ if (!SpecializeLiteralConstant && !Inserted && !Metrics.isRecursive)
+ return false;
+
+ int64_t Sz = Metrics.NumInsts.getValue();
+ assert(Sz > 0 && "CodeSize should be positive");
+ // It is safe to down cast from int64_t, NumInsts is always positive.
+ unsigned FuncSize = static_cast<unsigned>(Sz);
+
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for "
+ << F.getName() << " is " << FuncSize << "\n");
+
+ if (Inserted && Metrics.isRecursive)
+ promoteConstantStackValues(&F);
+
+ if (!findSpecializations(&F, FuncSize, AllSpecs, SM)) {
+ LLVM_DEBUG(
+ dbgs() << "FnSpecialization: No possible specializations found for "
+ << F.getName() << "\n");
+ return false;
+ }
+ return true;
+}
+
/// Attempt to specialize functions in the module to enable constant
/// propagation across function boundaries.
///
@@ -678,58 +733,8 @@ bool FunctionSpecializer::run() {
SmallVector<Spec, 32> AllSpecs;
unsigned NumCandidates = 0;
for (Function &F : M) {
- if (!isCandidateFunction(&F))
- continue;
-
- auto [It, Inserted] = FunctionMetrics.try_emplace(&F);
- CodeMetrics &Metrics = It->second;
- //Analyze the function.
- if (Inserted) {
- SmallPtrSet<const Value *, 32> EphValues;
- CodeMetrics::collectEphemeralValues(&F, &GetAC(F), EphValues);
- for (BasicBlock &BB : F)
- Metrics.analyzeBasicBlock(&BB, GetTTI(F), EphValues);
- }
-
- // When specializing literal constants is enabled, always require functions
- // to be larger than MinFunctionSize, to prevent excessive specialization.
- const bool RequireMinSize =
- !ForceSpecialization &&
- (SpecializeLiteralConstant || !F.hasFnAttribute(Attribute::NoInline));
-
- // If the code metrics reveal that we shouldn't duplicate the function,
- // or if the code size implies that this function is easy to get inlined,
- // then we shouldn't specialize it.
- if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() ||
- (RequireMinSize && Metrics.NumInsts < MinFunctionSize))
- continue;
-
- // When specialization on literal constants is disabled, only consider
- // recursive functions when running multiple times to save wasted analysis,
- // as we will not be able to specialize on any newly found literal constant
- // return values.
- if (!SpecializeLiteralConstant && !Inserted && !Metrics.isRecursive)
- continue;
-
- int64_t Sz = Metrics.NumInsts.getValue();
- assert(Sz > 0 && "CodeSize should be positive");
- // It is safe to down cast from int64_t, NumInsts is always positive.
- unsigned FuncSize = static_cast<unsigned>(Sz);
-
- LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for "
- << F.getName() << " is " << FuncSize << "\n");
-
- if (Inserted && Metrics.isRecursive)
- promoteConstantStackValues(&F);
-
- if (!findSpecializations(&F, FuncSize, AllSpecs, SM)) {
- LLVM_DEBUG(
- dbgs() << "FnSpecialization: No possible specializations found for "
- << F.getName() << "\n");
- continue;
- }
-
- ++NumCandidates;
+ if (runOneSpec(F, SM, AllSpecs))
+ ++NumCandidates;
}
if (!NumCandidates) {
>From 24cc5b09ec3d2f644a85e1faab5951b53a27438e Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Mon, 29 Sep 2025 15:48:45 -0700
Subject: [PATCH 05/23] [FnSpecialization] Refactor logic for actually
performing the specialization into macro
Will need to call recursively.
No functional change.
---
.../Transforms/IPO/FunctionSpecialization.cpp | 72 ++++++++++---------
1 file changed, 37 insertions(+), 35 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index af57c0ce3394a..8d2009192e50f 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -788,45 +788,47 @@ bool FunctionSpecializer::run() {
for (unsigned I = 0; I < NSpecs; ++I) {
Spec &S = AllSpecs[BestSpecs[I]];
- // Accumulate the codesize growth for the function, now we are creating the
- // specialization.
- FunctionGrowth[S.F] += S.CodeSize;
-
- S.Clone = createSpecialization(S.F, S.Sig);
-
- // Update the known call sites to call the clone.
- for (auto &CS : S.CallSites) {
- Function *Clone = S.Clone;
- CallBase *Call = CS.CallSite;
- LLVM_DEBUG(dbgs() << "FnSpecialization: Redirecting " << *Call
- << " to call " << Clone->getName() << "\n");
- Call->setCalledFunction(Clone);
- auto &BFI = GetBFI(*Call->getFunction());
- std::optional<uint64_t> Count =
- BFI.getBlockProfileCount(Call->getParent());
- if (Count && !ProfcheckDisableMetadataFixes) {
- std::optional<llvm::Function::ProfileCount> MaybeCloneCount =
- Clone->getEntryCount();
- if (MaybeCloneCount) {
- uint64_t CallCount = *Count + MaybeCloneCount->getCount();
- Clone->setEntryCount(CallCount);
- if (std::optional<llvm::Function::ProfileCount> MaybeOriginalCount =
- S.F->getEntryCount()) {
- uint64_t OriginalCount = MaybeOriginalCount->getCount();
- if (OriginalCount >= *Count) {
- S.F->setEntryCount(OriginalCount - *Count);
- } else {
- // This should generally not happen as that would mean there are
- // more computed calls to the function than what was recorded.
- LLVM_DEBUG(S.F->setEntryCount(0));
+ auto actuallySpecialize = [&](Spec &S) -> void {
+ // Accumulate the codesize growth for the function, now we are creating
+ // the specialization.
+ FunctionGrowth[S.F] += S.CodeSize;
+
+ S.Clone = createSpecialization(S.F, S.Sig);
+
+ // Update the known call sites to call the clone.
+ for (auto &CS : S.CallSites) {
+ Function *Clone = S.Clone;
+ CallBase *Call = CS.CallSite;
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Redirecting " << *Call
+ << " to call " << Clone->getName() << "\n");
+ Call->setCalledFunction(Clone);
+ auto &BFI = GetBFI(*Call->getFunction());
+ std::optional<uint64_t> Count =
+ BFI.getBlockProfileCount(Call->getParent());
+ if (Count && !ProfcheckDisableMetadataFixes) {
+ std::optional<llvm::Function::ProfileCount> MaybeCloneCount =
+ Clone->getEntryCount();
+ if (MaybeCloneCount) {
+ uint64_t CallCount = *Count + MaybeCloneCount->getCount();
+ Clone->setEntryCount(CallCount);
+ if (std::optional<llvm::Function::ProfileCount> MaybeOriginalCount =
+ S.F->getEntryCount()) {
+ uint64_t OriginalCount = MaybeOriginalCount->getCount();
+ if (OriginalCount >= *Count) {
+ S.F->setEntryCount(OriginalCount - *Count);
+ } else {
+ // This should generally not happen as that would mean there are
+ // more computed calls to the function than what was recorded.
+ LLVM_DEBUG(S.F->setEntryCount(0));
+ }
}
}
}
}
- }
-
- Clones.push_back(S.Clone);
- OriginalFuncs.insert(S.F);
+ Clones.push_back(S.Clone);
+ OriginalFuncs.insert(S.F);
+ };
+ actuallySpecialize(S);
}
Solver.solveWhileResolvedUndefsIn(Clones);
>From 32ef1d7df52ade01d9a235ddc3d652c397f8abf3 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Mon, 29 Sep 2025 17:58:50 -0700
Subject: [PATCH 06/23] [FnSpecialization] Pass a Spec to runOneSpec() rather
than a Function
Spec contains a Function, and will need to pass extra information
with Chaining.
---
.../llvm/Transforms/IPO/FunctionSpecialization.h | 10 ++++++----
llvm/lib/Transforms/IPO/FunctionSpecialization.cpp | 13 ++++++++-----
2 files changed, 14 insertions(+), 9 deletions(-)
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index b594a01aeac21..6959ccfb43b5a 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -156,6 +156,8 @@ struct Spec {
: F(F), Sig(S), Score(Score), CodeSize(CodeSize) {}
Spec(Function *F, const SpecSig &&S, unsigned Score, unsigned CodeSize)
: F(F), Sig(S), Score(Score), CodeSize(CodeSize) {}
+ Spec(Function *F)
+ : F(F), Clone(nullptr), Sig(), Score(0), CodeSize(), CallSites(0) {}
};
class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
@@ -304,15 +306,15 @@ class FunctionSpecializer {
/// @param AllSpecs A vector to add potential specializations to.
/// @param SM A map for a function's specialisation range
/// @return True, if any potential specializations were found
- bool findSpecializations(Function *F, unsigned FuncSize,
- SmallVectorImpl<Spec> &AllSpecs, SpecMap &SM);
+ bool findSpecializations(unsigned FuncSize, SmallVectorImpl<Spec> &AllSpecs,
+ SpecMap &SM, Spec &InS);
/// @brief Find specialization opportunities for a given function.
- /// @param F Function to specialize
+ /// @param S Specialization to complete, possibly with a Callsite attached.
/// @param SM A map for a function's specialisation range
/// @param AllSpecs A vector to add potential specializations to.
/// @return True, if any potential specializations were found
- bool runOneSpec(Function &F, SpecMap &SM, SmallVectorImpl<Spec> &AllSpecs);
+ bool runOneSpec(Spec &S, SpecMap &SM, SmallVectorImpl<Spec> &AllSpecs);
/// Compute the inlining bonus for replacing argument \p A with constant \p C.
unsigned getInliningBonus(Argument *A, Constant *C);
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 8d2009192e50f..008de3b9e8337 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -668,8 +668,9 @@ static unsigned getCostValue(const Cost &C) {
return static_cast<unsigned>(Value);
}
-bool FunctionSpecializer::runOneSpec(Function &F, SpecMap &SM,
+bool FunctionSpecializer::runOneSpec(Spec &S, SpecMap &SM,
SmallVectorImpl<Spec> &AllSpecs) {
+ Function &F = *(S.F);
if (!isCandidateFunction(&F))
return false;
@@ -714,7 +715,7 @@ bool FunctionSpecializer::runOneSpec(Function &F, SpecMap &SM,
if (Inserted && Metrics.isRecursive)
promoteConstantStackValues(&F);
- if (!findSpecializations(&F, FuncSize, AllSpecs, SM)) {
+ if (!findSpecializations(FuncSize, AllSpecs, SM, S)) {
LLVM_DEBUG(
dbgs() << "FnSpecialization: No possible specializations found for "
<< F.getName() << "\n");
@@ -733,7 +734,8 @@ bool FunctionSpecializer::run() {
SmallVector<Spec, 32> AllSpecs;
unsigned NumCandidates = 0;
for (Function &F : M) {
- if (runOneSpec(F, SM, AllSpecs))
+ Spec S(&F);
+ if (runOneSpec(S, SM, AllSpecs))
++NumCandidates;
}
@@ -906,9 +908,10 @@ static Function *cloneCandidateFunction(Function *F, unsigned NSpecs) {
return Clone;
}
-bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
+bool FunctionSpecializer::findSpecializations(unsigned FuncSize,
SmallVectorImpl<Spec> &AllSpecs,
- SpecMap &SM) {
+ SpecMap &SM, Spec &InS) {
+ Function *F = InS.F;
// A mapping from a specialisation signature to the index of the respective
// entry in the all specialisation array. Used to ensure uniqueness of
// specialisations.
>From d6a2c96a173aa53fb96b8f949af1666620e523a4 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Mon, 29 Sep 2025 21:19:56 -0700
Subject: [PATCH 07/23] [FnSpecialization] Use the same UniqueSpecs across
entire run()
Used to be a single object within findSpecializations() since
each Function only entered findSpecializations() once. But will
now be going in arbitrary order with Chains.
---
.../Transforms/IPO/FunctionSpecialization.h | 7 +++++--
.../Transforms/IPO/FunctionSpecialization.cpp | 21 ++++++++++---------
2 files changed, 16 insertions(+), 12 deletions(-)
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index 6959ccfb43b5a..f621554e83636 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -307,14 +307,17 @@ class FunctionSpecializer {
/// @param SM A map for a function's specialisation range
/// @return True, if any potential specializations were found
bool findSpecializations(unsigned FuncSize, SmallVectorImpl<Spec> &AllSpecs,
- SpecMap &SM, Spec &InS);
+ SpecMap &SM, Spec &InS,
+ DenseMap<SpecSig, unsigned> &UniqueSpecs);
/// @brief Find specialization opportunities for a given function.
/// @param S Specialization to complete, possibly with a Callsite attached.
/// @param SM A map for a function's specialisation range
/// @param AllSpecs A vector to add potential specializations to.
+ /// @param UniqueSpecs Map of existing specializations.
/// @return True, if any potential specializations were found
- bool runOneSpec(Spec &S, SpecMap &SM, SmallVectorImpl<Spec> &AllSpecs);
+ bool runOneSpec(Spec &S, SpecMap &SM, SmallVectorImpl<Spec> &AllSpecs,
+ DenseMap<SpecSig, unsigned> &UniqueSpecs);
/// Compute the inlining bonus for replacing argument \p A with constant \p C.
unsigned getInliningBonus(Argument *A, Constant *C);
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 008de3b9e8337..15e6ecddc615e 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -669,7 +669,8 @@ static unsigned getCostValue(const Cost &C) {
}
bool FunctionSpecializer::runOneSpec(Spec &S, SpecMap &SM,
- SmallVectorImpl<Spec> &AllSpecs) {
+ SmallVectorImpl<Spec> &AllSpecs,
+ DenseMap<SpecSig, unsigned> &UniqueSpecs) {
Function &F = *(S.F);
if (!isCandidateFunction(&F))
return false;
@@ -715,7 +716,7 @@ bool FunctionSpecializer::runOneSpec(Spec &S, SpecMap &SM,
if (Inserted && Metrics.isRecursive)
promoteConstantStackValues(&F);
- if (!findSpecializations(FuncSize, AllSpecs, SM, S)) {
+ if (!findSpecializations(FuncSize, AllSpecs, SM, S, UniqueSpecs)) {
LLVM_DEBUG(
dbgs() << "FnSpecialization: No possible specializations found for "
<< F.getName() << "\n");
@@ -732,10 +733,14 @@ bool FunctionSpecializer::run() {
// Find possible specializations for each function.
SpecMap SM;
SmallVector<Spec, 32> AllSpecs;
+ // A mapping from a specialisation signature to the index of the respective
+ // entry in the all specialisation array. Used to ensure uniqueness of
+ // specialisations.
+ DenseMap<SpecSig, unsigned> UniqueSpecs;
unsigned NumCandidates = 0;
for (Function &F : M) {
Spec S(&F);
- if (runOneSpec(S, SM, AllSpecs))
+ if (runOneSpec(S, SM, AllSpecs, UniqueSpecs))
++NumCandidates;
}
@@ -908,14 +913,10 @@ static Function *cloneCandidateFunction(Function *F, unsigned NSpecs) {
return Clone;
}
-bool FunctionSpecializer::findSpecializations(unsigned FuncSize,
- SmallVectorImpl<Spec> &AllSpecs,
- SpecMap &SM, Spec &InS) {
+bool FunctionSpecializer::findSpecializations(
+ unsigned FuncSize, SmallVectorImpl<Spec> &AllSpecs, SpecMap &SM, Spec &InS,
+ DenseMap<SpecSig, unsigned> &UniqueSpecs) {
Function *F = InS.F;
- // A mapping from a specialisation signature to the index of the respective
- // entry in the all specialisation array. Used to ensure uniqueness of
- // specialisations.
- DenseMap<SpecSig, unsigned> UniqueSpecs;
// Get a list of interesting arguments.
SmallVector<Argument *> Args;
>From 37b9a5daaefce9b89f67ab4f2d99fb34b1b287cf Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 30 Sep 2025 15:15:10 -0700
Subject: [PATCH 08/23] [FnSpecialization] Don't rely on UniqueSpec to
determine if specialization occurred
---
llvm/lib/Transforms/IPO/FunctionSpecialization.cpp | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 15e6ecddc615e..46cad15936abe 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -917,6 +917,7 @@ bool FunctionSpecializer::findSpecializations(
unsigned FuncSize, SmallVectorImpl<Spec> &AllSpecs, SpecMap &SM, Spec &InS,
DenseMap<SpecSig, unsigned> &UniqueSpecs) {
Function *F = InS.F;
+ bool FoundSpecialization = false;
// Get a list of interesting arguments.
SmallVector<Argument *> Args;
@@ -1040,12 +1041,15 @@ bool FunctionSpecializer::findSpecializations(
Spec.addCall({&CS});
const unsigned Index = AllSpecs.size() - 1;
UniqueSpecs[S] = Index;
+
+ FoundSpecialization = true;
+
if (auto [It, Inserted] = SM.try_emplace(F, Index, Index + 1); !Inserted)
It->second.second = Index + 1;
}
}
- return !UniqueSpecs.empty();
+ return FoundSpecialization;
}
bool FunctionSpecializer::isCandidateFunction(Function *F) {
>From 2bc068ba0bf201340223f4b50beef84a2cde7b97 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Mon, 29 Sep 2025 12:30:28 -0700
Subject: [PATCH 09/23] [FnSpecialization] Modify SpecMap to hold a pointer to
every specialization
Cannot rely on AllSpecs to be inorder after Chaining.
---
.../Transforms/IPO/FunctionSpecialization.h | 13 +++++--------
.../Transforms/IPO/FunctionSpecialization.cpp | 17 ++++++++---------
2 files changed, 13 insertions(+), 17 deletions(-)
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index f621554e83636..5507c7281222f 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -95,11 +95,8 @@
namespace llvm {
struct Spec;
-// Map of potential specializations for each function. The FunctionSpecializer
-// keeps the discovered specialisation opportunities for the module in a single
-// vector, where the specialisations of each function form a contiguous range.
-// This map's value is the beginning and the end of that range.
-using SpecMap = DenseMap<Function *, std::pair<unsigned, unsigned>>;
+// Map of potential specializations for each function.
+using SpecMap = DenseMap<Function *, SmallVector<unsigned>>;
// Just a shorter abbreviation to improve indentation.
using Cost = InstructionCost;
@@ -340,9 +337,9 @@ class FunctionSpecializer {
/// @brief Find and update calls to \p F, which match a specialization
/// @param F Orginal function
- /// @param Begin Start of a range of possibly matching specialisations
- /// @param End End of a range (exclusive) of possibly matching specialisations
- void updateCallSites(Function *F, const Spec *Begin, const Spec *End);
+ /// @param Specs Vector of possibly matching specialisations
+ void updateCallSites(Function *F, const SmallVector<unsigned> &Specs,
+ SmallVector<Spec, 32> AllSpecs);
};
} // namespace llvm
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 46cad15936abe..607468f396296 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -843,10 +843,8 @@ bool FunctionSpecializer::run() {
// Update the rest of the call sites - these are the recursive calls, calls
// to discarded specialisations and calls that may match a specialisation
// after the solver runs.
- for (Function *F : OriginalFuncs) {
- auto [Begin, End] = SM[F];
- updateCallSites(F, AllSpecs.begin() + Begin, AllSpecs.begin() + End);
- }
+ for (Function *F : OriginalFuncs)
+ updateCallSites(F, SM[F], AllSpecs);
for (Function *F : Clones) {
if (F->getReturnType()->isVoidTy())
@@ -1044,8 +1042,7 @@ bool FunctionSpecializer::findSpecializations(
FoundSpecialization = true;
- if (auto [It, Inserted] = SM.try_emplace(F, Index, Index + 1); !Inserted)
- It->second.second = Index + 1;
+ SM[F].push_back(Index);
}
}
@@ -1223,8 +1220,9 @@ Constant *FunctionSpecializer::getCandidateConstant(Value *V) {
return C;
}
-void FunctionSpecializer::updateCallSites(Function *F, const Spec *Begin,
- const Spec *End) {
+void FunctionSpecializer::updateCallSites(Function *F,
+ const SmallVector<unsigned> &Specs,
+ SmallVector<Spec, 32> AllSpecs) {
// Collect the call sites that need updating.
SmallVector<CallBase *> ToUpdate;
for (User *U : F->users())
@@ -1239,7 +1237,8 @@ void FunctionSpecializer::updateCallSites(Function *F, const Spec *Begin,
// Find the best matching specialisation.
const Spec *BestSpec = nullptr;
- for (const Spec &S : make_range(Begin, End)) {
+ for (const unsigned SI : Specs) {
+ const auto &S = AllSpecs[SI];
if (!S.Clone || (BestSpec && S.Score <= BestSpec->Score))
continue;
>From 6a8cfdce5f0f384ba65f7789e3208ac5703b657f Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Mon, 29 Sep 2025 18:18:41 -0700
Subject: [PATCH 10/23] [FnSpecialization] (1/6) Enable function specialization
chaining
If a function is called with constants that passes those constants to another function,
try to specialize both of those functions.
---
.../Transforms/IPO/FunctionSpecialization.h | 51 ++-
.../Transforms/IPO/FunctionSpecialization.cpp | 331 ++++++++++++++----
2 files changed, 301 insertions(+), 81 deletions(-)
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index 5507c7281222f..96dbd5d9a31e5 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -95,9 +95,13 @@
namespace llvm {
struct Spec;
+struct SpecSig;
+
// Map of potential specializations for each function.
using SpecMap = DenseMap<Function *, SmallVector<unsigned>>;
+using CallUserT = SmallMapVector<CallBase *, std::pair<SpecSig, Function *>, 4>;
+
// Just a shorter abbreviation to improve indentation.
using Cost = InstructionCost;
@@ -123,8 +127,14 @@ struct SpecSig {
}
};
+enum CallSiteStatusT {
+ AWAITING_PARENT, HAS_PARENT, NO_PARENT
+};
+
struct SpecCall {
CallBase *CallSite;
+ CallSiteStatusT Status;
+ unsigned Parent;
};
// Specialization instance.
@@ -149,10 +159,20 @@ struct Spec {
void addCall(SpecCall SC) { CallSites.push_back(SC); }
- Spec(Function *F, const SpecSig &S, unsigned Score, unsigned CodeSize)
- : F(F), Sig(S), Score(Score), CodeSize(CodeSize) {}
- Spec(Function *F, const SpecSig &&S, unsigned Score, unsigned CodeSize)
- : F(F), Sig(S), Score(Score), CodeSize(CodeSize) {}
+ // List Sub-Specializations
+ SmallVector<unsigned> SubSpecs;
+
+ // Index within AllSpecs
+ unsigned Loc = 0;
+
+ Spec(Function *F, CallBase *CallSite, const SpecSig &S, CallSiteStatusT Status)
+ : F(F), Clone(nullptr), Sig(S), Score(), CodeSize(), CallSites() {
+ addCall({CallSite, Status, /*Parent*/ 0});
+ }
+ Spec(Function *F, CallBase *CallSite, CallSiteStatusT Status)
+ : F(F), Clone(nullptr), Sig(), Score(), CodeSize(), CallSites() {
+ addCall({CallSite, Status, /*Parent*/ 0});
+ }
Spec(Function *F)
: F(F), Clone(nullptr), Sig(), Score(0), CodeSize(), CallSites(0) {}
};
@@ -187,7 +207,8 @@ class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
return Solver.isBlockExecutable(BB) && !DeadBlocks.contains(BB);
}
- LLVM_ABI Cost getCodeSizeSavingsForArg(Argument *A, Constant *C);
+ LLVM_ABI Cost getCodeSizeSavingsForArg(Argument *A, Constant *C,
+ CallUserT *CallUsers = nullptr);
LLVM_ABI Cost getCodeSizeSavingsFromPendingPHIs();
@@ -201,7 +222,9 @@ class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
bool canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ) const;
Cost getCodeSizeSavingsForUser(Instruction *User, Value *Use = nullptr,
- Constant *C = nullptr);
+ Constant *C = nullptr,
+ CallUserT *CallUsers = nullptr,
+ llvm::Use *UseEdge = nullptr);
Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList);
Cost estimateSwitchInst(SwitchInst &I);
@@ -302,19 +325,26 @@ class FunctionSpecializer {
/// @param FuncSize Cost of specializing a function.
/// @param AllSpecs A vector to add potential specializations to.
/// @param SM A map for a function's specialisation range
+ /// @param CurrentChain Current chain of function calls.
/// @return True, if any potential specializations were found
bool findSpecializations(unsigned FuncSize, SmallVectorImpl<Spec> &AllSpecs,
SpecMap &SM, Spec &InS,
- DenseMap<SpecSig, unsigned> &UniqueSpecs);
+ DenseMap<SpecSig, unsigned> &UniqueSpecs,
+ SmallPtrSet<Function *, 4> &CurrentChain);
/// @brief Find specialization opportunities for a given function.
/// @param S Specialization to complete, possibly with a Callsite attached.
+ /// @param Chained Is this call part of a chain build?
/// @param SM A map for a function's specialisation range
/// @param AllSpecs A vector to add potential specializations to.
/// @param UniqueSpecs Map of existing specializations.
+ /// @param CurrentChain Current chain of function calls.
+ /// site.
/// @return True, if any potential specializations were found
- bool runOneSpec(Spec &S, SpecMap &SM, SmallVectorImpl<Spec> &AllSpecs,
- DenseMap<SpecSig, unsigned> &UniqueSpecs);
+ bool runOneSpec(Spec &S, bool Chained, SpecMap &SM,
+ SmallVectorImpl<Spec> &AllSpecs,
+ DenseMap<SpecSig, unsigned> &UniqueSpecs,
+ SmallPtrSet<Function *, 4> CurrentChain);
/// Compute the inlining bonus for replacing argument \p A with constant \p C.
unsigned getInliningBonus(Argument *A, Constant *C);
@@ -325,7 +355,8 @@ class FunctionSpecializer {
/// @param F Function to specialize
/// @param S Which specialization to create
/// @return The new, cloned function
- Function *createSpecialization(Function *F, const SpecSig &S);
+ Function *createSpecialization(Function *F, const SpecSig &S,
+ ValueToValueMapTy &Mappings);
/// Determine if it is possible to specialise the function for constant values
/// of the formal parameter \p A.
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 607468f396296..02836a7fc65d8 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -166,15 +166,18 @@ Cost InstCostVisitor::getCodeSizeSavingsFromPendingPHIs() {
}
/// Compute the codesize savings for replacing argument \p A with constant \p C.
-Cost InstCostVisitor::getCodeSizeSavingsForArg(Argument *A, Constant *C) {
+Cost InstCostVisitor::getCodeSizeSavingsForArg(Argument *A, Constant *C,
+ CallUserT *CallUsers) {
LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: "
<< C->getNameOrAsOperand() << "\n");
Cost CodeSize;
- for (auto *U : A->users())
- if (auto *UI = dyn_cast<Instruction>(U))
+ for (Use &UseEdge : A->uses()) {
+ User *U = UseEdge.getUser();
+ if (auto *UI = dyn_cast<Instruction>(U)) {
if (isBlockExecutable(UI->getParent()))
- CodeSize += getCodeSizeSavingsForUser(UI, A, C);
-
+ CodeSize += getCodeSizeSavingsForUser(UI, A, C, CallUsers, &UseEdge);
+ }
+ }
LLVM_DEBUG(dbgs() << "FnSpecialization: Accumulated bonus {CodeSize = "
<< CodeSize << "} for argument " << *A << "\n");
return CodeSize;
@@ -217,7 +220,9 @@ Cost InstCostVisitor::getLatencySavingsForKnownConstants() {
}
Cost InstCostVisitor::getCodeSizeSavingsForUser(Instruction *User, Value *Use,
- Constant *C) {
+ Constant *C,
+ CallUserT *CallUsers,
+ llvm::Use *UseEdge) {
// We have already propagated a constant for this user.
if (KnownConstants.contains(User))
return 0;
@@ -227,10 +232,35 @@ Cost InstCostVisitor::getCodeSizeSavingsForUser(Instruction *User, Value *Use,
: KnownConstants.end();
Cost CodeSize = 0;
+ auto isChainableCall = [&](Instruction *I) -> bool {
+ if (CallInst *CI = dyn_cast<CallInst>(I);
+ CI && CI->getIntrinsicID() == llvm::Intrinsic::not_intrinsic) {
+ LLVM_DEBUG(
+ dbgs() << "FnSpecialization: Found constant forwarded via a call "
+ << *C << "\n");
+ Function *F = CI->getCalledFunction();
+ if (F && CallUsers && UseEdge) { // Avoid function pointers
+ unsigned Idx = CI->getArgOperandNo(UseEdge);
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Function called: "
+ << F->getName() << " argument number: " << Idx
+ << "\n");
+ (*CallUsers)[CI].first.Args.push_back({F->getArg(Idx), C});
+ (*CallUsers)[CI].second = F;
+ return true;
+ } else {
+ LLVM_DEBUG(
+ dbgs() << "FnSpecialization: Could not find call function.\n");
+ }
+ }
+ return false;
+ };
if (auto *I = dyn_cast<SwitchInst>(User)) {
CodeSize = estimateSwitchInst(*I);
} else if (auto *I = dyn_cast<BranchInst>(User)) {
CodeSize = estimateBranchInst(*I);
+ } else if (isChainableCall(User)) {
+ // Will get benefit from recusive call to findSpecializations()
+ return 0;
} else {
C = visit(*User);
if (!C)
@@ -668,13 +698,17 @@ static unsigned getCostValue(const Cost &C) {
return static_cast<unsigned>(Value);
}
-bool FunctionSpecializer::runOneSpec(Spec &S, SpecMap &SM,
+bool FunctionSpecializer::runOneSpec(Spec &S, bool Chained, SpecMap &SM,
SmallVectorImpl<Spec> &AllSpecs,
- DenseMap<SpecSig, unsigned> &UniqueSpecs) {
+ DenseMap<SpecSig, unsigned> &UniqueSpecs,
+ SmallPtrSet<Function *, 4> CurrentChain) {
Function &F = *(S.F);
if (!isCandidateFunction(&F))
return false;
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Trying function " << F.getName()
+ << ", Chain=" << Chained << "\n");
+
auto [It, Inserted] = FunctionMetrics.try_emplace(&F);
CodeMetrics &Metrics = It->second;
// Analyze the function.
@@ -716,7 +750,8 @@ bool FunctionSpecializer::runOneSpec(Spec &S, SpecMap &SM,
if (Inserted && Metrics.isRecursive)
promoteConstantStackValues(&F);
- if (!findSpecializations(FuncSize, AllSpecs, SM, S, UniqueSpecs)) {
+ if (!findSpecializations(FuncSize, AllSpecs, SM, S, UniqueSpecs,
+ CurrentChain)) {
LLVM_DEBUG(
dbgs() << "FnSpecialization: No possible specializations found for "
<< F.getName() << "\n");
@@ -740,7 +775,9 @@ bool FunctionSpecializer::run() {
unsigned NumCandidates = 0;
for (Function &F : M) {
Spec S(&F);
- if (runOneSpec(S, SM, AllSpecs, UniqueSpecs))
+ SmallPtrSet<Function *, 4> CurrentChain;
+ if (runOneSpec(S, /*Chained*/ false, SM, AllSpecs, UniqueSpecs,
+ CurrentChain))
++NumCandidates;
}
@@ -792,50 +829,90 @@ bool FunctionSpecializer::run() {
// Create the chosen specializations.
SmallPtrSet<Function *, 8> OriginalFuncs;
SmallVector<Function *> Clones;
+ // Does this also need to include the base function in the hash, or is the
+ // SpecSig sufficient
+ DenseMap<SpecSig, Function *> UniqueClones;
for (unsigned I = 0; I < NSpecs; ++I) {
Spec &S = AllSpecs[BestSpecs[I]];
- auto actuallySpecialize = [&](Spec &S) -> void {
- // Accumulate the codesize growth for the function, now we are creating
- // the specialization.
- FunctionGrowth[S.F] += S.CodeSize;
-
- S.Clone = createSpecialization(S.F, S.Sig);
+ // Update the known call sites to call the clone.
+ ValueToValueMapTy Mappings;
+
+ auto actuallySpecialize = [&](auto &&actuallySpecialize, Spec &S,
+ CallSiteStatusT Status, unsigned Parent,
+ ValueToValueMapTy &Mappings) -> void {
+ if (Status == CallSiteStatusT::HAS_PARENT) {
+ for (auto &CS : S.CallSites) {
+ if (CS.Status == Status && CS.Parent == Parent) {
+ CallBase *&Call = CS.CallSite;
+ Value *V = Mappings[Call];
+ Call = dyn_cast<CallBase>(V);
+ }
+ }
+ }
- // Update the known call sites to call the clone.
+ bool NewClone;
+ ValueToValueMapTy CurrMappings;
+ if (auto It = UniqueClones.find(S.Sig); It != UniqueClones.end()) {
+ NewClone = false;
+ S.Clone = It->second;
+ } else {
+ NewClone = true;
+ S.Clone = createSpecialization(S.F, S.Sig, CurrMappings);
+
+ // Accumulate the codesize growth for the function, now we are creating
+ // the specialization.
+ FunctionGrowth[S.F] += S.CodeSize;
+
+ UniqueClones[S.Sig] = S.Clone;
+ Clones.push_back(S.Clone);
+ OriginalFuncs.insert(S.F);
+ }
for (auto &CS : S.CallSites) {
- Function *Clone = S.Clone;
- CallBase *Call = CS.CallSite;
- LLVM_DEBUG(dbgs() << "FnSpecialization: Redirecting " << *Call
- << " to call " << Clone->getName() << "\n");
- Call->setCalledFunction(Clone);
- auto &BFI = GetBFI(*Call->getFunction());
- std::optional<uint64_t> Count =
- BFI.getBlockProfileCount(Call->getParent());
- if (Count && !ProfcheckDisableMetadataFixes) {
- std::optional<llvm::Function::ProfileCount> MaybeCloneCount =
- Clone->getEntryCount();
- if (MaybeCloneCount) {
- uint64_t CallCount = *Count + MaybeCloneCount->getCount();
- Clone->setEntryCount(CallCount);
- if (std::optional<llvm::Function::ProfileCount> MaybeOriginalCount =
- S.F->getEntryCount()) {
- uint64_t OriginalCount = MaybeOriginalCount->getCount();
- if (OriginalCount >= *Count) {
- S.F->setEntryCount(OriginalCount - *Count);
- } else {
- // This should generally not happen as that would mean there are
- // more computed calls to the function than what was recorded.
- LLVM_DEBUG(S.F->setEntryCount(0));
+ if (CS.Status == Status && CS.Parent == Parent) {
+ Function *Clone = S.Clone;
+ CallBase *&Call = CS.CallSite;
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Redirecting " << *Call
+ << " to call " << Clone->getName() << "\n");
+ Call->setCalledFunction(Clone);
+ auto &BFI = GetBFI(*Call->getFunction());
+ std::optional<uint64_t> Count =
+ BFI.getBlockProfileCount(Call->getParent());
+ if (Count && !ProfcheckDisableMetadataFixes) {
+ std::optional<llvm::Function::ProfileCount> MaybeCloneCount =
+ Clone->getEntryCount();
+ if (MaybeCloneCount) {
+ uint64_t CallCount = *Count + MaybeCloneCount->getCount();
+ Clone->setEntryCount(CallCount);
+ if (std::optional<llvm::Function::ProfileCount>
+ MaybeOriginalCount = S.F->getEntryCount()) {
+ uint64_t OriginalCount = MaybeOriginalCount->getCount();
+ if (OriginalCount >= *Count) {
+ S.F->setEntryCount(OriginalCount - *Count);
+ } else {
+ // This should generally not happen as that would mean there
+ // are more computed calls to the function than what was
+ // recorded.
+ LLVM_DEBUG(S.F->setEntryCount(0));
+ }
}
}
}
}
}
- Clones.push_back(S.Clone);
- OriginalFuncs.insert(S.F);
+ if (!NewClone)
+ return;
+ for (auto &SSI : S.SubSpecs) {
+ Spec &SS = AllSpecs[SSI];
+ actuallySpecialize(actuallySpecialize, SS,
+ /*Status*/ CallSiteStatusT::HAS_PARENT,
+ /*Parent*/ S.Loc, CurrMappings);
+ }
};
- actuallySpecialize(S);
+
+ actuallySpecialize(actuallySpecialize, S,
+ /*hasParent*/ CallSiteStatusT::NO_PARENT, /*Parent*/ 0,
+ Mappings);
}
Solver.solveWhileResolvedUndefsIn(Clones);
@@ -903,8 +980,8 @@ void FunctionSpecializer::removeDeadFunctions() {
/// Clone the function \p F and remove the ssa_copy intrinsics added by
/// the SCCPSolver in the cloned version.
-static Function *cloneCandidateFunction(Function *F, unsigned NSpecs) {
- ValueToValueMapTy Mappings;
+static Function *cloneCandidateFunction(Function *F, unsigned NSpecs,
+ ValueToValueMapTy &Mappings) {
Function *Clone = CloneFunction(F, Mappings);
Clone->setName(F->getName() + ".specialized." + Twine(NSpecs));
removeSSACopy(*Clone);
@@ -913,7 +990,8 @@ static Function *cloneCandidateFunction(Function *F, unsigned NSpecs) {
bool FunctionSpecializer::findSpecializations(
unsigned FuncSize, SmallVectorImpl<Spec> &AllSpecs, SpecMap &SM, Spec &InS,
- DenseMap<SpecSig, unsigned> &UniqueSpecs) {
+ DenseMap<SpecSig, unsigned> &UniqueSpecs,
+ SmallPtrSet<Function *, 4> &CurrentChain) {
Function *F = InS.F;
bool FoundSpecialization = false;
@@ -926,15 +1004,32 @@ bool FunctionSpecializer::findSpecializations(
if (Args.empty())
return false;
- for (User *U : F->users()) {
- if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
- continue;
- auto &CS = *cast<CallBase>(U);
+ SmallVector<CallBase *, 8> CallSites;
+ CallSiteStatusT Status;
+ if (InS.CallSites.size()) {
+ assert(InS.CallSites.size() == 1 &&
+ "Should only be passing single call spec as part of a chain");
+ CallSites.push_back(InS.CallSites[0].CallSite);
+ Status = CallSiteStatusT::AWAITING_PARENT;
+ } else {
+ Status = CallSiteStatusT::NO_PARENT;
+ for (User *U : F->users()) {
+ // If multiple funcs, check that user is proceeding func
+ if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
+ continue;
+ auto *CS = cast<CallBase>(U);
- // The user instruction does not call our function.
- if (CS.getCalledFunction() != F)
- continue;
+ // The user instruction does not call our function.
+ if (CS->getCalledFunction() != F)
+ continue;
+
+ CallSites.push_back(CS);
+ }
+ }
+ for (auto *CSP : CallSites) {
+ auto &CS = *CSP;
+ Spec Chain(F, /*CallSite*/ CSP, Status);
// If the call site has attribute minsize set, that callsite won't be
// specialized.
if (CS.hasFnAttr(Attribute::MinSize))
@@ -949,18 +1044,41 @@ bool FunctionSpecializer::findSpecializations(
// constant operands of this call site.
SpecSig S;
for (Argument *A : Args) {
- Constant *C = getCandidateConstant(CS.getArgOperand(A->getArgNo()));
- if (!C)
- continue;
- LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting argument "
- << A->getName() << " : " << C->getNameOrAsOperand()
- << "\n");
- S.Args.push_back({A, C});
+ // Check if this argument is constant from the call chain propogation
+ unsigned Idx;
+ auto &As = InS.Sig.Args;
+ for (Idx = 0; Idx < As.size(); ++Idx) {
+ if (As[Idx].Formal == A)
+ break;
+ }
+ if (As.size() == Idx) {
+ Value *PossC = CS.getArgOperand(A->getArgNo());
+ Constant *C = getCandidateConstant(PossC);
+ if (!C)
+ continue;
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting argument "
+ << A->getName() << " : " << C->getNameOrAsOperand()
+ << "\n");
+ S.Args.push_back({A, C});
+ if (InS.CallSites.size()) {
+ assert(InS.CallSites.size() == 1 &&
+ "Should only be passing single call spec as part of a chain");
+ InS.Sig.Args.push_back({A, C});
+ }
+ } else {
+ Constant *C = InS.Sig.Args[Idx].Actual;
+ S.Args.push_back({A, C});
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Found passed argument "
+ << A->getName() << " : " << C->getNameOrAsOperand()
+ << "\n");
+ }
}
if (S.Args.empty())
continue;
+ CallUserT CallUsers;
+
// Check if we have encountered the same specialisation already.
if (auto It = UniqueSpecs.find(S); It != UniqueSpecs.end()) {
// Existing specialisation. Add the call to the list to rewrite, unless
@@ -972,17 +1090,53 @@ bool FunctionSpecializer::findSpecializations(
if (CS.getFunction() == F)
continue;
const unsigned Index = It->second;
- AllSpecs[Index].addCall({&CS});
+ AllSpecs[Index].addCall({&CS, Status, /*Parent*/ 0});
} else {
// Calculate the specialisation gain.
Cost CodeSize;
unsigned Score = 0;
InstCostVisitor Visitor = getInstCostVisitorFor(F);
for (ArgInfo &A : S.Args) {
- CodeSize += Visitor.getCodeSizeSavingsForArg(A.Formal, A.Actual);
+ CodeSize +=
+ Visitor.getCodeSizeSavingsForArg(A.Formal, A.Actual, &CallUsers);
Score += getInliningBonus(A.Formal, A.Actual);
}
+
CodeSize += Visitor.getCodeSizeSavingsFromPendingPHIs();
+ CurrentChain.insert(F);
+
+ for (auto &CU : CallUsers) {
+ Function *NewF = CU.second.second;
+
+ // Recurse only if constants found for the function
+ if (!NewF)
+ continue;
+
+ // Don't allow any recursion in chains
+ bool isRecursion = CurrentChain.contains(NewF);
+ if (isRecursion)
+ continue;
+
+ LLVM_DEBUG(
+ dbgs() << "FnSpecialization: Recursively calling runOneSpec() on "
+ << NewF->getName() << "\n");
+
+ // Since the function might not yet be known when processing the
+ // constants due to a function pointer, wait to extract the argument
+ // pointer at a given index.
+ SpecSig NewS = CU.second.first;
+
+ Spec CallSpec(NewF, /*CallSite*/ CU.first, NewS,
+ /*Status*/ CallSiteStatusT::AWAITING_PARENT);
+ runOneSpec(CallSpec, /*Chained*/ true, SM, AllSpecs, UniqueSpecs,
+ CurrentChain);
+
+ // Use CallSpec.Sig since may have been added to within findSpec()
+ if (auto It = UniqueSpecs.find(CallSpec.Sig); It != UniqueSpecs.end()) {
+ const unsigned Index = It->second;
+ Chain.SubSpecs.push_back(Index);
+ }
+ }
unsigned CodeSizeSavings = getCostValue(CodeSize);
unsigned SpecSize = FuncSize - CodeSizeSavings;
@@ -1025,19 +1179,52 @@ bool FunctionSpecializer::findSpecializations(
if ((FunctionGrowth[F] + SpecSize) / FuncSize > MaxCodeSizeGrowth)
return false;
- Score += std::max(CodeSizeSavings, LatencySavings);
+ Chain.Score += std::max(CodeSizeSavings, LatencySavings);
return true;
};
- // Discard unprofitable specialisations.
- if (!IsProfitable())
+ auto RemoveFromSubSpecs = [&](Spec &S) -> void {
+ for (unsigned &SSI : S.SubSpecs) {
+ Spec &SS = AllSpecs[SSI];
+ auto NewEnd = std::remove_if(
+ SS.CallSites.begin(), SS.CallSites.end(),
+ [&](SpecCall &SC) -> bool {
+ return SC.Status == CallSiteStatusT::AWAITING_PARENT;
+ });
+ SS.CallSites.erase(NewEnd, SS.CallSites.end());
+ }
+ };
+
+ // Discard unprofitable specialisations
+ if (!IsProfitable()) {
+ RemoveFromSubSpecs(Chain); // Remove Parent from SubSpecs
continue;
+ }
+
+ auto AddParentToSubSpecs = [&](Spec &S) -> void {
+ for (unsigned &SSI : S.SubSpecs) {
+ Spec &SS = AllSpecs[SSI];
+ for (SpecCall &SC : SS.CallSites) {
+ if (SC.Status == CallSiteStatusT::AWAITING_PARENT) {
+ SC.Status = CallSiteStatusT::HAS_PARENT;
+ SC.Parent = S.Loc;
+ }
+ }
+ }
+ };
// Create a new specialisation entry.
- auto &Spec = AllSpecs.emplace_back(F, S, Score, SpecSize);
- if (CS.getFunction() != F)
- Spec.addCall({&CS});
+ auto &Spec = AllSpecs.emplace_back(Chain);
const unsigned Index = AllSpecs.size() - 1;
+ Spec.Loc = Index;
+ AddParentToSubSpecs(Spec);
+ // Update the chain's Sig for any new constants at this level
+ Spec.Sig = S;
+ Spec.CodeSize = SpecSize;
+
+ if (CS.getFunction() == F && !Spec.CallSites[0].Parent) {
+ Spec.CallSites.clear();
+ }
UniqueSpecs[S] = Index;
FoundSpecialization = true;
@@ -1078,9 +1265,11 @@ bool FunctionSpecializer::isCandidateFunction(Function *F) {
return true;
}
-Function *FunctionSpecializer::createSpecialization(Function *F,
- const SpecSig &S) {
- Function *Clone = cloneCandidateFunction(F, Specializations.size() + 1);
+Function *
+FunctionSpecializer::createSpecialization(Function *F, const SpecSig &S,
+ ValueToValueMapTy &Mappings) {
+ Function *Clone =
+ cloneCandidateFunction(F, Specializations.size() + 1, Mappings);
// The original function does not neccessarily have internal linkage, but the
// clone must.
>From 6e0c258b2605206f086afe355850c5151f862fee Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Mon, 29 Sep 2025 18:44:09 -0700
Subject: [PATCH 11/23] [FnSpecialization] (2/6) Avoid creating standalone
specializations when only ever part of a chain
Will get specialized as part of the chain if the chain scores well enough.
---
llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h | 7 ++++++-
llvm/lib/Transforms/IPO/FunctionSpecialization.cpp | 5 +++++
2 files changed, 11 insertions(+), 1 deletion(-)
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index 96dbd5d9a31e5..c18efd5bf6132 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -157,7 +157,12 @@ struct Spec {
// List of call sites, matching this specialization.
SmallVector<SpecCall> CallSites;
- void addCall(SpecCall SC) { CallSites.push_back(SC); }
+ bool AllChains = true;
+
+ void addCall(SpecCall SC) {
+ CallSites.push_back(SC);
+ AllChains = AllChains && SC.Status != CallSiteStatusT::NO_PARENT;
+ }
// List Sub-Specializations
SmallVector<unsigned> SubSpecs;
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 02836a7fc65d8..7ce86cf0f221c 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -792,6 +792,10 @@ bool FunctionSpecializer::run() {
// specialization budget, which is derived from maximum number of
// specializations per specialization candidate function.
auto CompareScore = [&AllSpecs](unsigned I, unsigned J) {
+ if (AllSpecs[J].AllChains)
+ return true;
+ if (AllSpecs[I].AllChains)
+ return false;
if (AllSpecs[I].Score != AllSpecs[J].Score)
return AllSpecs[I].Score > AllSpecs[J].Score;
return I > J;
@@ -1224,6 +1228,7 @@ bool FunctionSpecializer::findSpecializations(
if (CS.getFunction() == F && !Spec.CallSites[0].Parent) {
Spec.CallSites.clear();
+ Spec.AllChains = true;
}
UniqueSpecs[S] = Index;
>From 5070b852256bc9c52e626107ec5463af6104bf7a Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Mon, 29 Sep 2025 18:38:41 -0700
Subject: [PATCH 12/23] [FnSpecialization] (3/6) Don't consider specializations
that are only chains in NSpecs
Will get specialized as part of chain, so aren't viable as a standalone.
---
llvm/lib/Transforms/IPO/FunctionSpecialization.cpp | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 7ce86cf0f221c..800771adb55bf 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -781,7 +781,11 @@ bool FunctionSpecializer::run() {
++NumCandidates;
}
- if (!NumCandidates) {
+ unsigned IndepSpecs = 0;
+ for (auto &S : AllSpecs)
+ if (!S.AllChains)
+ ++IndepSpecs;
+ if (!NumCandidates || !IndepSpecs) {
LLVM_DEBUG(
dbgs()
<< "FnSpecialization: No possible specializations found in module\n");
@@ -800,8 +804,8 @@ bool FunctionSpecializer::run() {
return AllSpecs[I].Score > AllSpecs[J].Score;
return I > J;
};
- const unsigned NSpecs =
- std::min(NumCandidates * MaxClones, unsigned(AllSpecs.size()));
+ const unsigned NSpecs = std::min(
+ {NumCandidates * MaxClones, unsigned(AllSpecs.size()), IndepSpecs});
SmallVector<unsigned> BestSpecs(NSpecs + 1);
std::iota(BestSpecs.begin(), BestSpecs.begin() + NSpecs, 0);
if (AllSpecs.size() > NSpecs) {
@@ -1228,7 +1232,7 @@ bool FunctionSpecializer::findSpecializations(
if (CS.getFunction() == F && !Spec.CallSites[0].Parent) {
Spec.CallSites.clear();
- Spec.AllChains = true;
+ // Don't reset AllChains since this can be standalone specialized
}
UniqueSpecs[S] = Index;
>From 2cff3147b575f5c811150a0c39728cd42170de5b Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Mon, 29 Sep 2025 17:02:18 -0700
Subject: [PATCH 13/23] [FnSpecialization] (4/6) Cache scoring metrics as part
of Spec
When calculating possible Chains, use the metrics saved as part
of the sub-specializations.
---
.../Transforms/IPO/FunctionSpecialization.h | 21 ++++++--
.../Transforms/IPO/FunctionSpecialization.cpp | 50 ++++++++++++++++---
2 files changed, 59 insertions(+), 12 deletions(-)
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index c18efd5bf6132..bcc9f84308c33 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -154,6 +154,15 @@ struct Spec {
// Number of instructions in the specialization.
unsigned CodeSize;
+ // Cumulative function size of the chain
+ unsigned FuncSize;
+
+ // Latency savings
+ unsigned Latency;
+
+ // Benefit from inlining
+ unsigned InlineScore;
+
// List of call sites, matching this specialization.
SmallVector<SpecCall> CallSites;
@@ -170,16 +179,20 @@ struct Spec {
// Index within AllSpecs
unsigned Loc = 0;
- Spec(Function *F, CallBase *CallSite, const SpecSig &S, CallSiteStatusT Status)
- : F(F), Clone(nullptr), Sig(S), Score(), CodeSize(), CallSites() {
+ Spec(Function *F, CallBase *CallSite, const SpecSig &S,
+ CallSiteStatusT Status)
+ : F(F), Clone(nullptr), Sig(S), Score(), CodeSize(), FuncSize(),
+ InlineScore(), CallSites() {
addCall({CallSite, Status, /*Parent*/ 0});
}
Spec(Function *F, CallBase *CallSite, CallSiteStatusT Status)
- : F(F), Clone(nullptr), Sig(), Score(), CodeSize(), CallSites() {
+ : F(F), Clone(nullptr), Sig(), Score(), CodeSize(), FuncSize(),
+ InlineScore(), CallSites() {
addCall({CallSite, Status, /*Parent*/ 0});
}
Spec(Function *F)
- : F(F), Clone(nullptr), Sig(), Score(0), CodeSize(), CallSites(0) {}
+ : F(F), Clone(nullptr), Sig(), Score(), CodeSize(), FuncSize(),
+ InlineScore(), CallSites() {}
};
class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 800771adb55bf..7a78bd5d5aa1c 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -1149,6 +1149,27 @@ bool FunctionSpecializer::findSpecializations(
unsigned CodeSizeSavings = getCostValue(CodeSize);
unsigned SpecSize = FuncSize - CodeSizeSavings;
+ // Cache savings information in the chain to use for profitibility
+ // analysis of the entire chain
+ Chain.CodeSize = SpecSize;
+ Chain.InlineScore = Score;
+ Chain.FuncSize = FuncSize;
+ unsigned CumulCodeSize = 0;
+ unsigned CumulFuncSize = 0;
+ unsigned CumulInlineScore = 0;
+ unsigned CumulLatency = 0;
+ auto getCumulScores = [&](auto &&getCumulScores, Spec &CurrSpec) -> void {
+ CumulCodeSize += CurrSpec.CodeSize;
+ CumulFuncSize += CurrSpec.FuncSize;
+ CumulInlineScore += CurrSpec.InlineScore;
+ CumulLatency += CurrSpec.Latency;
+ for (auto SSI : CurrSpec.SubSpecs) {
+ getCumulScores(getCumulScores, AllSpecs[SSI]);
+ }
+ };
+ getCumulScores(getCumulScores, Chain);
+ unsigned CumulCodeSizeSavings = CumulFuncSize - CumulCodeSize;
+
auto IsProfitable = [&]() -> bool {
// No check required.
if (ForceSpecialization)
@@ -1157,37 +1178,50 @@ bool FunctionSpecializer::findSpecializations(
LLVM_DEBUG(
dbgs() << "FnSpecialization: Specialization bonus {Inlining = "
<< Score << " (" << (Score * 100 / FuncSize) << "%)}\n");
+ LLVM_DEBUG(
+ dbgs()
+ << "FnSpecialization: Chain specialization bonus {Inlining = "
+ << CumulInlineScore << " ("
+ << (CumulInlineScore * 100 / CumulFuncSize) << "%)}\n");
// Minimum inlining bonus.
- if (Score > MinInliningBonus * FuncSize / 100)
+ if ((Score > MinInliningBonus * FuncSize / 100) &&
+ (CumulInlineScore > MinInliningBonus * CumulFuncSize / 100))
return true;
LLVM_DEBUG(
dbgs() << "FnSpecialization: Specialization bonus {CodeSize = "
<< CodeSizeSavings << " ("
<< (CodeSizeSavings * 100 / FuncSize) << "%)}\n");
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Cumulative specialization "
+ "bonus {CodeSize = "
+ << CumulCodeSizeSavings << " ("
+ << (CumulCodeSizeSavings * 100 / CumulFuncSize)
+ << "%)}\n");
// Minimum codesize savings.
- if (CodeSizeSavings <= MinCodeSizeSavings * FuncSize / 100)
+ if ((CodeSizeSavings <= MinCodeSizeSavings * FuncSize / 100) &&
+ (CumulCodeSizeSavings <= MinCodeSizeSavings * CumulFuncSize / 100))
return false;
// Lazily compute the Latency, to avoid unnecessarily computing BFI.
- unsigned LatencySavings =
+ Chain.Latency =
getCostValue(Visitor.getLatencySavingsForKnownConstants());
+ CumulLatency += Chain.Latency;
LLVM_DEBUG(
dbgs() << "FnSpecialization: Specialization bonus {Latency = "
- << LatencySavings << " ("
- << (LatencySavings * 100 / FuncSize) << "%)}\n");
+ << CumulLatency << " ("
+ << (CumulLatency * 100 / CumulFuncSize) << "%)}\n");
// Minimum latency savings.
- if (LatencySavings < MinLatencySavings * FuncSize / 100)
+ if (CumulLatency < MinLatencySavings * CumulFuncSize / 100)
return false;
// Maximum codesize growth.
if ((FunctionGrowth[F] + SpecSize) / FuncSize > MaxCodeSizeGrowth)
return false;
- Chain.Score += std::max(CodeSizeSavings, LatencySavings);
+ Score = CumulInlineScore + std::max(CumulCodeSizeSavings, CumulLatency);
return true;
};
@@ -1228,7 +1262,7 @@ bool FunctionSpecializer::findSpecializations(
AddParentToSubSpecs(Spec);
// Update the chain's Sig for any new constants at this level
Spec.Sig = S;
- Spec.CodeSize = SpecSize;
+ Spec.Score = Score;
if (CS.getFunction() == F && !Spec.CallSites[0].Parent) {
Spec.CallSites.clear();
>From 49725f4e0f211989f49e1e9a82ac54e0d692bbfd Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 30 Sep 2025 16:39:23 -0700
Subject: [PATCH 14/23] [FnSpecialization] (5/6) Use an explicit structure for
tracking visited functions
Otherwise confusing with Chaining.
---
llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h | 1 +
llvm/lib/Transforms/IPO/FunctionSpecialization.cpp | 7 ++++++-
2 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index bcc9f84308c33..0e42d012fefbb 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -295,6 +295,7 @@ class FunctionSpecializer {
SmallPtrSet<Function *, 32> Specializations;
SmallPtrSet<Function *, 32> DeadFunctions;
+ SmallPtrSet<Function *, 32> VisitedFunctions;
DenseMap<Function *, CodeMetrics> FunctionMetrics;
DenseMap<Function *, unsigned> FunctionGrowth;
unsigned NGlobals = 0;
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 7a78bd5d5aa1c..bf6d675957aa9 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -736,9 +736,14 @@ bool FunctionSpecializer::runOneSpec(Spec &S, bool Chained, SpecMap &SM,
// recursive functions when running multiple times to save wasted analysis,
// as we will not be able to specialize on any newly found literal constant
// return values.
- if (!SpecializeLiteralConstant && !Inserted && !Metrics.isRecursive)
+ if (!Chained && !SpecializeLiteralConstant && VisitedFunctions.contains(&F) &&
+ !Metrics.isRecursive)
return false;
+ // Don't want to mistake this chain for checking all of the CallSites for F
+ if (!Chained)
+ VisitedFunctions.insert(&F);
+
int64_t Sz = Metrics.NumInsts.getValue();
assert(Sz > 0 && "CodeSize should be positive");
// It is safe to down cast from int64_t, NumInsts is always positive.
>From 1b5a006ecbbbc6c6e68fd588b5d19472db97a9d5 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 30 Sep 2025 12:09:52 -0700
Subject: [PATCH 15/23] [FnSpecialization] (6/6) Update tests for prior set of
changes
---
.../specialize-chain.ll | 44 +++++++--
.../FunctionSpecialization/track-return.ll | 94 ++++++++++---------
2 files changed, 86 insertions(+), 52 deletions(-)
diff --git a/llvm/test/Transforms/FunctionSpecialization/specialize-chain.ll b/llvm/test/Transforms/FunctionSpecialization/specialize-chain.ll
index 8b3a028ca1aa7..ac786d0e27cee 100644
--- a/llvm/test/Transforms/FunctionSpecialization/specialize-chain.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/specialize-chain.ll
@@ -114,19 +114,51 @@ entry:
; CHECK-NEXT: ret i32 0
;
;
-; CHECK-LABEL: define range(i32 -2147483642, -2147483648) i32 @main() {
+; CHECK-LABEL: define i32 @main() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[ADD:%.*]] = call i32 @incr.specialized.1(i32 10)
; CHECK-NEXT: [[INT:%.*]] = call i32 @intrinsic(i32 3)
; CHECK-NEXT: [[FWD_UNFOLD:%.*]] = call i32 @forward_unfold(i32 3)
-; CHECK-NEXT: [[FWD_INNER:%.*]] = call i32 @forward_inner(i32 3)
-; CHECK-NEXT: [[FWD_OUTER:%.*]] = call i32 @forward_outer(i32 3)
-; CHECK-NEXT: [[FWD_OUTER1:%.*]] = call i32 @forward_outer(i32 3)
-; CHECK-NEXT: [[MULTI_CALL:%.*]] = call i32 @multi_call(i32 5)
-; CHECK-NEXT: ret i32 [[MULTI_CALL]]
+; CHECK-NEXT: [[FWD_INNER:%.*]] = call i32 @forward_inner.specialized.4(i32 3)
+; CHECK-NEXT: [[FWD_OUTER:%.*]] = call i32 @forward_outer.specialized.6(i32 3)
+; CHECK-NEXT: [[FWD_OUTER1:%.*]] = call i32 @forward_outer.specialized.6(i32 3)
+; CHECK-NEXT: [[MULTI_CALL:%.*]] = call i32 @multi_call.specialized.2(i32 5)
+; CHECK-NEXT: ret i32 11
;
;
; CHECK-LABEL: define internal i32 @incr.specialized.1(
; CHECK-SAME: i32 [[A:%.*]]) {
; CHECK-NEXT: ret i32 poison
;
+;
+; CHECK-LABEL: define internal i32 @multi_call.specialized.2(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @incr.specialized.3(i32 5)
+; CHECK-NEXT: [[MUL_CALL:%.*]] = call i32 @incr.specialized.1(i32 10)
+; CHECK-NEXT: ret i32 poison
+;
+;
+; CHECK-LABEL: define internal i32 @incr.specialized.3(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT: ret i32 poison
+;
+;
+; CHECK-LABEL: define internal i32 @forward_inner.specialized.4(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @incr.specialized.5(i32 3)
+; CHECK-NEXT: ret i32 poison
+;
+;
+; CHECK-LABEL: define internal i32 @incr.specialized.5(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT: ret i32 poison
+;
+;
+; CHECK-LABEL: define internal i32 @forward_outer.specialized.6(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @forward_inner.specialized.4(i32 3)
+; CHECK-NEXT: ret i32 poison
+;
diff --git a/llvm/test/Transforms/FunctionSpecialization/track-return.ll b/llvm/test/Transforms/FunctionSpecialization/track-return.ll
index aaff6c138bbaa..d03d7c872ed79 100644
--- a/llvm/test/Transforms/FunctionSpecialization/track-return.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/track-return.ll
@@ -4,7 +4,7 @@ define i64 @main() {
; CHECK: define i64 @main
; CHECK-NEXT: entry:
; CHECK-NEXT: [[C1:%.*]] = call i64 @foo.specialized.1(i1 true, i64 3, i64 1)
-; CHECK-NEXT: [[C2:%.*]] = call i64 @foo.specialized.2(i1 false, i64 4, i64 -1)
+; CHECK-NEXT: [[C2:%.*]] = call i64 @foo.specialized.3(i1 false, i64 4, i64 -1)
; CHECK-NEXT: ret i64 8
;
entry:
@@ -15,27 +15,6 @@ entry:
}
define internal i64 @foo(i1 %flag, i64 %m, i64 %n) {
-;
-; CHECK: define internal i64 @foo.specialized.1
-; CHECK-NEXT: entry:
-; CHECK-NEXT: br label %plus
-; CHECK: plus:
-; CHECK-NEXT: [[N0:%.*]] = call i64 @binop.specialized.4(i64 3, i64 1)
-; CHECK-NEXT: [[RES0:%.*]] = call i64 @bar.specialized.6(i64 4)
-; CHECK-NEXT: br label %merge
-; CHECK: merge:
-; CHECK-NEXT: ret i64 poison
-;
-; CHECK: define internal i64 @foo.specialized.2
-; CHECK-NEXT: entry:
-; CHECK-NEXT: br label %minus
-; CHECK: minus:
-; CHECK-NEXT: [[N1:%.*]] = call i64 @binop.specialized.3(i64 4, i64 -1)
-; CHECK-NEXT: [[RES1:%.*]] = call i64 @bar.specialized.5(i64 3)
-; CHECK-NEXT: br label %merge
-; CHECK: merge:
-; CHECK-NEXT: ret i64 poison
-;
entry:
br i1 %flag, label %plus, label %minus
@@ -55,21 +34,61 @@ merge:
}
define internal i64 @binop(i64 %x, i64 %y) {
+entry:
+ %z = add i64 %x, %y
+ ret i64 %z
+}
+
+define internal i64 @bar(i64 %n) {
+entry:
+ %cmp = icmp sgt i64 %n, 3
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ %res0 = sdiv i64 %n, 2
+ br label %if.end
+
+if.else:
+ %res1 = mul i64 %n, 2
+ br label %if.end
+
+if.end:
+ %res = phi i64 [ %res0, %if.then ], [ %res1, %if.else]
+ ret i64 %res
+}
+
+;
+; CHECK: define internal i64 @foo.specialized.1
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label %plus
+; CHECK: plus:
+; CHECK-NEXT: [[N0:%.*]] = call i64 @binop.specialized.2(i64 3, i64 1)
+; CHECK-NEXT: [[RES0:%.*]] = call i64 @bar.specialized.6(i64 4)
+; CHECK-NEXT: br label %merge
+; CHECK: merge:
+; CHECK-NEXT: ret i64 poison
+;
;
-; CHECK: define internal i64 @binop.specialized.3
+; CHECK: define internal i64 @binop.specialized.2
; CHECK-NEXT: entry:
; CHECK-NEXT: ret i64 poison
;
+;
+; CHECK: define internal i64 @foo.specialized.3
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label %minus
+; CHECK: minus:
+; CHECK-NEXT: [[N1:%.*]] = call i64 @binop.specialized.4(i64 4, i64 -1)
+; CHECK-NEXT: [[RES1:%.*]] = call i64 @bar.specialized.5(i64 3)
+; CHECK-NEXT: br label %merge
+; CHECK: merge:
+; CHECK-NEXT: ret i64 poison
+;
+;
; CHECK: define internal i64 @binop.specialized.4
; CHECK-NEXT: entry:
; CHECK-NEXT: ret i64 poison
;
-entry:
- %z = add i64 %x, %y
- ret i64 %z
-}
-
-define internal i64 @bar(i64 %n) {
;
; CHECK: define internal i64 @bar.specialized.5
; CHECK-NEXT: entry:
@@ -87,20 +106,3 @@ define internal i64 @bar(i64 %n) {
; CHECK: if.end:
; CHECK-NEXT: ret i64 poison
;
-entry:
- %cmp = icmp sgt i64 %n, 3
- br i1 %cmp, label %if.then, label %if.else
-
-if.then:
- %res0 = sdiv i64 %n, 2
- br label %if.end
-
-if.else:
- %res1 = mul i64 %n, 2
- br label %if.end
-
-if.end:
- %res = phi i64 [ %res0, %if.then ], [ %res1, %if.else]
- ret i64 %res
-}
-
>From 841a08e9d1e63a937671db4cecf787b91988462f Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Mon, 29 Sep 2025 17:18:48 -0700
Subject: [PATCH 16/23] [FnSpecialization] Allow chains to form via recrusive
folding
---
.../Transforms/IPO/FunctionSpecialization.cpp | 7 +++---
.../specialize-chain.ll | 22 ++++++++++++++-----
2 files changed, 21 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index bf6d675957aa9..60e1134e5af14 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -276,11 +276,12 @@ Cost InstCostVisitor::getCodeSizeSavingsForUser(Instruction *User, Value *Use,
LLVM_DEBUG(dbgs() << "FnSpecialization: {CodeSize = " << CodeSize
<< "} for user " << *User << "\n");
-
- for (auto *U : User->users())
+ for (llvm::Use &UE : User->uses()) {
+ llvm::User *U = UE.getUser();
if (auto *UI = dyn_cast<Instruction>(U))
if (UI != User && isBlockExecutable(UI->getParent()))
- CodeSize += getCodeSizeSavingsForUser(UI, User, C);
+ CodeSize += getCodeSizeSavingsForUser(UI, User, C, CallUsers, &UE);
+ }
return CodeSize;
}
diff --git a/llvm/test/Transforms/FunctionSpecialization/specialize-chain.ll b/llvm/test/Transforms/FunctionSpecialization/specialize-chain.ll
index ac786d0e27cee..e09f33e7fadaa 100644
--- a/llvm/test/Transforms/FunctionSpecialization/specialize-chain.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/specialize-chain.ll
@@ -118,11 +118,11 @@ entry:
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[ADD:%.*]] = call i32 @incr.specialized.1(i32 10)
; CHECK-NEXT: [[INT:%.*]] = call i32 @intrinsic(i32 3)
-; CHECK-NEXT: [[FWD_UNFOLD:%.*]] = call i32 @forward_unfold(i32 3)
+; CHECK-NEXT: [[FWD_UNFOLD:%.*]] = call i32 @forward_unfold.specialized.2(i32 3)
; CHECK-NEXT: [[FWD_INNER:%.*]] = call i32 @forward_inner.specialized.4(i32 3)
; CHECK-NEXT: [[FWD_OUTER:%.*]] = call i32 @forward_outer.specialized.6(i32 3)
; CHECK-NEXT: [[FWD_OUTER1:%.*]] = call i32 @forward_outer.specialized.6(i32 3)
-; CHECK-NEXT: [[MULTI_CALL:%.*]] = call i32 @multi_call.specialized.2(i32 5)
+; CHECK-NEXT: [[MULTI_CALL:%.*]] = call i32 @multi_call.specialized.7(i32 5)
; CHECK-NEXT: ret i32 11
;
;
@@ -131,11 +131,10 @@ entry:
; CHECK-NEXT: ret i32 poison
;
;
-; CHECK-LABEL: define internal i32 @multi_call.specialized.2(
+; CHECK-LABEL: define internal i32 @forward_unfold.specialized.2(
; CHECK-SAME: i32 [[A:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[CALL:%.*]] = call i32 @incr.specialized.3(i32 5)
-; CHECK-NEXT: [[MUL_CALL:%.*]] = call i32 @incr.specialized.1(i32 10)
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @incr.specialized.3(i32 30)
; CHECK-NEXT: ret i32 poison
;
;
@@ -162,3 +161,16 @@ entry:
; CHECK-NEXT: [[CALL:%.*]] = call i32 @forward_inner.specialized.4(i32 3)
; CHECK-NEXT: ret i32 poison
;
+;
+; CHECK-LABEL: define internal i32 @multi_call.specialized.7(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @incr.specialized.8(i32 5)
+; CHECK-NEXT: [[MUL_CALL:%.*]] = call i32 @incr.specialized.1(i32 10)
+; CHECK-NEXT: ret i32 poison
+;
+;
+; CHECK-LABEL: define internal i32 @incr.specialized.8(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT: ret i32 poison
+;
>From fc70c018aab69d737b690d4adb37ab3284f2172d Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Mon, 29 Sep 2025 16:26:06 -0700
Subject: [PATCH 17/23] [FnSpecialization] Allow chains to form when collapsing
PHI nodes
---
llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h | 3 ++-
llvm/lib/Transforms/IPO/FunctionSpecialization.cpp | 7 ++++---
2 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index 0e42d012fefbb..1910f3ecdba4f 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -228,7 +228,8 @@ class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
LLVM_ABI Cost getCodeSizeSavingsForArg(Argument *A, Constant *C,
CallUserT *CallUsers = nullptr);
- LLVM_ABI Cost getCodeSizeSavingsFromPendingPHIs();
+ LLVM_ABI Cost
+ getCodeSizeSavingsFromPendingPHIs(CallUserT *CallUsers = nullptr);
LLVM_ABI Cost getLatencySavingsForKnownConstants();
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 60e1134e5af14..8c3586fab4ea5 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -154,13 +154,14 @@ Constant *InstCostVisitor::findConstantFor(Value *V) const {
return KnownConstants.lookup(V);
}
-Cost InstCostVisitor::getCodeSizeSavingsFromPendingPHIs() {
+Cost InstCostVisitor::getCodeSizeSavingsFromPendingPHIs(CallUserT *CallUsers) {
Cost CodeSize;
while (!PendingPHIs.empty()) {
Instruction *Phi = PendingPHIs.pop_back_val();
// The pending PHIs could have been proven dead by now.
if (isBlockExecutable(Phi->getParent()))
- CodeSize += getCodeSizeSavingsForUser(Phi);
+ CodeSize +=
+ getCodeSizeSavingsForUser(Phi, nullptr, nullptr, CallUsers, nullptr);
}
return CodeSize;
}
@@ -1116,7 +1117,7 @@ bool FunctionSpecializer::findSpecializations(
Score += getInliningBonus(A.Formal, A.Actual);
}
- CodeSize += Visitor.getCodeSizeSavingsFromPendingPHIs();
+ CodeSize += Visitor.getCodeSizeSavingsFromPendingPHIs(&CallUsers);
CurrentChain.insert(F);
for (auto &CU : CallUsers) {
>From fb21dc60867473e85f76dc9f45c8c445f279922f Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 30 Sep 2025 22:09:58 -0700
Subject: [PATCH 18/23] [FnSpecialization] Refactor CallUsersT to contain
Idx/Constant pairs
In the future we won't know the Function at the time of insertion, so
need to store and index so we can look up the Argument later.
---
llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h | 6 +++---
llvm/lib/Transforms/IPO/FunctionSpecialization.cpp | 6 ++++--
2 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index 1910f3ecdba4f..a70439146a144 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -95,12 +95,12 @@
namespace llvm {
struct Spec;
-struct SpecSig;
-
// Map of potential specializations for each function.
using SpecMap = DenseMap<Function *, SmallVector<unsigned>>;
-using CallUserT = SmallMapVector<CallBase *, std::pair<SpecSig, Function *>, 4>;
+using CallUserT = SmallMapVector<
+ CallBase *,
+ std::pair<SmallVector<std::pair<unsigned, Constant *>, 4>, Function *>, 4>;
// Just a shorter abbreviation to improve indentation.
using Cost = InstructionCost;
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 8c3586fab4ea5..f0d323929d474 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -245,7 +245,7 @@ Cost InstCostVisitor::getCodeSizeSavingsForUser(Instruction *User, Value *Use,
LLVM_DEBUG(dbgs() << "FnSpecialization: Function called: "
<< F->getName() << " argument number: " << Idx
<< "\n");
- (*CallUsers)[CI].first.Args.push_back({F->getArg(Idx), C});
+ (*CallUsers)[CI].first.push_back({Idx, C});
(*CallUsers)[CI].second = F;
return true;
} else {
@@ -1139,7 +1139,9 @@ bool FunctionSpecializer::findSpecializations(
// Since the function might not yet be known when processing the
// constants due to a function pointer, wait to extract the argument
// pointer at a given index.
- SpecSig NewS = CU.second.first;
+ SpecSig NewS;
+ for (auto &P : CU.second.first)
+ NewS.Args.push_back({NewF->getArg(P.first), P.second});
Spec CallSpec(NewF, /*CallSite*/ CU.first, NewS,
/*Status*/ CallSiteStatusT::AWAITING_PARENT);
>From da77758123d701196a386beafdfb52a21a3df537 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Wed, 1 Oct 2025 07:34:48 -0700
Subject: [PATCH 19/23] [FnSpecialization] If the Argument number is greater
than the number of arguments, skip chaining
See test/Transforms/FunctionSpecialization/compiler-crash-60191.ll
---
llvm/lib/Transforms/IPO/FunctionSpecialization.cpp | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index f0d323929d474..dd126a91da7e6 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -1067,7 +1067,10 @@ bool FunctionSpecializer::findSpecializations(
break;
}
if (As.size() == Idx) {
- Value *PossC = CS.getArgOperand(A->getArgNo());
+ unsigned ArgNo = A->getArgNo();
+ if (ArgNo >= CS.arg_size())
+ continue;
+ Value *PossC = CS.getArgOperand(ArgNo);
Constant *C = getCandidateConstant(PossC);
if (!C)
continue;
>From 9773c8b61e11b57032be33800319d6fefe8bf1f0 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Wed, 1 Oct 2025 07:37:57 -0700
Subject: [PATCH 20/23] [FnSpecialization] Allow specialization of indirect
function calls exposed by specialization
---
.../Transforms/IPO/FunctionSpecialization.cpp | 12 ++++++++-
.../compiler-crash-60191.ll | 4 +--
.../track-ptr-return.ll | 27 +++++++++++++------
3 files changed, 32 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index dd126a91da7e6..497b0ffb44b33 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -234,13 +234,15 @@ Cost InstCostVisitor::getCodeSizeSavingsForUser(Instruction *User, Value *Use,
Cost CodeSize = 0;
auto isChainableCall = [&](Instruction *I) -> bool {
+ if (!CallUsers || !UseEdge)
+ return false;
if (CallInst *CI = dyn_cast<CallInst>(I);
CI && CI->getIntrinsicID() == llvm::Intrinsic::not_intrinsic) {
LLVM_DEBUG(
dbgs() << "FnSpecialization: Found constant forwarded via a call "
<< *C << "\n");
Function *F = CI->getCalledFunction();
- if (F && CallUsers && UseEdge) { // Avoid function pointers
+ if (F) { // Avoid function pointers
unsigned Idx = CI->getArgOperandNo(UseEdge);
LLVM_DEBUG(dbgs() << "FnSpecialization: Function called: "
<< F->getName() << " argument number: " << Idx
@@ -248,9 +250,17 @@ Cost InstCostVisitor::getCodeSizeSavingsForUser(Instruction *User, Value *Use,
(*CallUsers)[CI].first.push_back({Idx, C});
(*CallUsers)[CI].second = F;
return true;
+ } else if (Use == CI->getCalledOperand()) {
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Found call to constant "
+ "function pointer.\n");
+ Function *CF = dyn_cast<Function>(C);
+ assert(CF && "Indirect call to a non-Function type");
+ (*CallUsers)[CI].second = CF;
} else {
LLVM_DEBUG(
dbgs() << "FnSpecialization: Could not find call function.\n");
+ unsigned Idx = CI->getArgOperandNo(UseEdge);
+ (*CallUsers)[CI].first.push_back({Idx, C});
}
}
return false;
diff --git a/llvm/test/Transforms/FunctionSpecialization/compiler-crash-60191.ll b/llvm/test/Transforms/FunctionSpecialization/compiler-crash-60191.ll
index 668929824cc6f..456480b2cc674 100644
--- a/llvm/test/Transforms/FunctionSpecialization/compiler-crash-60191.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/compiler-crash-60191.ll
@@ -60,7 +60,7 @@ define i32 @f2(i32 %offset) {
}
; Tests that `func` has been specialized and it didn't cause compiler crash.
+; CHECK-DAG: func.specialized.4
+; CHECK-DAG: func.specialized.5
; CHECK-DAG: func.specialized.1
-; CHECK-DAG: func.specialized.2
-; CHECK-DAG: func.specialized.3
diff --git a/llvm/test/Transforms/FunctionSpecialization/track-ptr-return.ll b/llvm/test/Transforms/FunctionSpecialization/track-ptr-return.ll
index f4ba0e72a1b43..ef40bf12ae59d 100644
--- a/llvm/test/Transforms/FunctionSpecialization/track-ptr-return.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/track-ptr-return.ll
@@ -48,9 +48,8 @@ entry:
; CHECK-NEXT: [[OP1:%.*]] = call ptr @select_op.specialized.1(ptr @global_true)
; CHECK-NEXT: [[OP2:%.*]] = call ptr @select_op.specialized.2(ptr @global_false)
; CHECK-NEXT: [[C1:%.*]] = call i64 @compute.specialized.3(ptr @plus)
-; CHECK-NEXT: [[C2:%.*]] = call i64 @compute.specialized.4(ptr @minus)
-; CHECK-NEXT: [[ADD:%.*]] = add i64 [[C1]], [[C2]]
-; CHECK-NEXT: ret i64 [[ADD]]
+; CHECK-NEXT: [[C2:%.*]] = call i64 @compute.specialized.5(ptr @minus)
+; CHECK-NEXT: ret i64 2
;
;
; CHECK-LABEL: define ptr @select_op(
@@ -87,15 +86,27 @@ entry:
; CHECK-LABEL: define internal i64 @compute.specialized.3(
; CHECK-SAME: ptr [[OP:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[RES:%.*]] = call i64 @plus(i64 1)
-; CHECK-NEXT: ret i64 [[RES]]
+; CHECK-NEXT: [[RES:%.*]] = call i64 @plus.specialized.4(i64 1)
+; CHECK-NEXT: ret i64 poison
;
;
-; CHECK-LABEL: define internal i64 @compute.specialized.4(
+; CHECK-LABEL: define internal i64 @plus.specialized.4(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret i64 poison
+;
+;
+; CHECK-LABEL: define internal i64 @compute.specialized.5(
; CHECK-SAME: ptr [[OP:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[RES:%.*]] = call i64 @minus(i64 1)
-; CHECK-NEXT: ret i64 [[RES]]
+; CHECK-NEXT: [[RES:%.*]] = call i64 @minus.specialized.6(i64 1)
+; CHECK-NEXT: ret i64 poison
+;
+;
+; CHECK-LABEL: define internal i64 @minus.specialized.6(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret i64 poison
;
;
; NOLIT-LABEL: define i64 @main() {
>From 706fb4a3f596978c8cca4e2680c4fd9c01e99cae Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Mon, 29 Sep 2025 17:24:29 -0700
Subject: [PATCH 21/23] [FnSpecialization] Allow functions that are too small
to specailize as part of a chain
This way we can still more accurately see the effect of the specialization.
---
.../Transforms/IPO/FunctionSpecialization.h | 2 ++
.../Transforms/IPO/FunctionSpecialization.cpp | 19 ++++++++++++++-----
2 files changed, 16 insertions(+), 5 deletions(-)
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index a70439146a144..a8f6447c9eff8 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -179,6 +179,8 @@ struct Spec {
// Index within AllSpecs
unsigned Loc = 0;
+ bool SpecializeOnOwn = true;
+
Spec(Function *F, CallBase *CallSite, const SpecSig &S,
CallSiteStatusT Status)
: F(F), Clone(nullptr), Sig(S), Score(), CodeSize(), FuncSize(),
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 497b0ffb44b33..24472e56fe6ee 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -740,10 +740,19 @@ bool FunctionSpecializer::runOneSpec(Spec &S, bool Chained, SpecMap &SM,
// If the code metrics reveal that we shouldn't duplicate the function,
// or if the code size implies that this function is easy to get inlined,
// then we shouldn't specialize it.
- if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() ||
- (RequireMinSize && Metrics.NumInsts < MinFunctionSize))
+ if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid())
return false;
+ if (RequireMinSize && Metrics.NumInsts < MinFunctionSize) {
+ if (Chained) {
+ // Want to specialize as part of chain still so we can more accurately
+ // assess the chain specialization
+ S.SpecializeOnOwn = false;
+ } else {
+ return false;
+ }
+ }
+
// When specialization on literal constants is disabled, only consider
// recursive functions when running multiple times to save wasted analysis,
// as we will not be able to specialize on any newly found literal constant
@@ -800,7 +809,7 @@ bool FunctionSpecializer::run() {
unsigned IndepSpecs = 0;
for (auto &S : AllSpecs)
- if (!S.AllChains)
+ if (S.SpecializeOnOwn && !S.AllChains)
++IndepSpecs;
if (!NumCandidates || !IndepSpecs) {
LLVM_DEBUG(
@@ -813,9 +822,9 @@ bool FunctionSpecializer::run() {
// specialization budget, which is derived from maximum number of
// specializations per specialization candidate function.
auto CompareScore = [&AllSpecs](unsigned I, unsigned J) {
- if (AllSpecs[J].AllChains)
+ if (!AllSpecs[J].SpecializeOnOwn || AllSpecs[J].AllChains)
return true;
- if (AllSpecs[I].AllChains)
+ if (!AllSpecs[I].SpecializeOnOwn || AllSpecs[I].AllChains)
return false;
if (AllSpecs[I].Score != AllSpecs[J].Score)
return AllSpecs[I].Score > AllSpecs[J].Score;
>From 8f9443849459c3acaf1416f3af675916958746e9 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Wed, 8 Oct 2025 15:02:09 -0700
Subject: [PATCH 22/23] [FnSpecialization] Don't specialize chained functions
that take variable arguments
---
llvm/lib/Transforms/IPO/FunctionSpecialization.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 24472e56fe6ee..746193eb2e547 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -1146,7 +1146,7 @@ bool FunctionSpecializer::findSpecializations(
Function *NewF = CU.second.second;
// Recurse only if constants found for the function
- if (!NewF)
+ if (!NewF || NewF->isVarArg())
continue;
// Don't allow any recursion in chains
>From 54f89a308485ddcc4c138c6d3ccac8eb29f3169a Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Fri, 17 Oct 2025 09:22:13 -0700
Subject: [PATCH 23/23] Lint fix
---
llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index a8f6447c9eff8..84bdd5c2379f1 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -127,9 +127,7 @@ struct SpecSig {
}
};
-enum CallSiteStatusT {
- AWAITING_PARENT, HAS_PARENT, NO_PARENT
-};
+enum CallSiteStatusT { AWAITING_PARENT, HAS_PARENT, NO_PARENT };
struct SpecCall {
CallBase *CallSite;
More information about the llvm-commits
mailing list