[llvm] [FnSpecialization] Enable function specialization of call chains (PR #163891)

Ryan Buchner via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 17 09:31:04 PDT 2025


https://github.com/bababuck updated https://github.com/llvm/llvm-project/pull/163891

>From 88fbb97e6ef7d401fe7245dbf4988eff8de73282 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 9 Sep 2025 13:44:37 -0700
Subject: [PATCH 01/23] [FnSpecialization] Only accept codesize savings if
 strictly greater than the minimum amount

If the knob for minimum code size is turned down low enough, for small functions:
`MinCodeSizeSavings * FuncSize / 100` will evaluate to `0`, and then with strict
less than we will accept Specialization that doesn't lead to any benefit.
---
 .../Transforms/IPO/FunctionSpecialization.cpp |  2 +-
 .../FunctionSpecialization/dead-gv-load.ll    |  4 +-
 .../FunctionSpecialization/maxgrowth.ll       |  8 +---
 .../recursive-penalty.ll                      | 38 ++++++++++++++++++-
 4 files changed, 40 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 150a2dc5d48e2..6d4b2fb7e0065 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -995,7 +995,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
                    << (CodeSizeSavings * 100 / FuncSize) << "%)}\n");
 
         // Minimum codesize savings.
-        if (CodeSizeSavings < MinCodeSizeSavings * FuncSize / 100)
+        if (CodeSizeSavings <= MinCodeSizeSavings * FuncSize / 100)
           return false;
 
         // Lazily compute the Latency, to avoid unnecessarily computing BFI.
diff --git a/llvm/test/Transforms/FunctionSpecialization/dead-gv-load.ll b/llvm/test/Transforms/FunctionSpecialization/dead-gv-load.ll
index 134a79d349035..337780d0de2e4 100644
--- a/llvm/test/Transforms/FunctionSpecialization/dead-gv-load.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/dead-gv-load.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -passes=ipsccp  --funcspec-min-function-size=1 -S < %s | FileCheck %s
-
+; RUN: opt -passes=ipsccp  --funcspec-min-function-size=1 \
+; RUN: -funcspec-min-codesize-savings=1 -S < %s | FileCheck %s
 @gv = internal global ptr null
 
 define i8 @caller() {
diff --git a/llvm/test/Transforms/FunctionSpecialization/maxgrowth.ll b/llvm/test/Transforms/FunctionSpecialization/maxgrowth.ll
index 82d1f7ae4a6e1..7dc7e8ec69f50 100644
--- a/llvm/test/Transforms/FunctionSpecialization/maxgrowth.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/maxgrowth.ll
@@ -26,7 +26,7 @@ entry:
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[NOTSPEC0:%.*]] = call i32 @add(i32 0, i32 [[N]])
 ; CHECK-NEXT:    [[NOTSPEC1:%.*]] = call i32 @add(i32 1, i32 [[N]])
-; CHECK-NEXT:    [[SPEC:%.*]] = call i32 @add.specialized.1(i32 1, i32 1)
+; CHECK-NEXT:    [[SPEC:%.*]] = call i32 @add(i32 1, i32 1)
 ; CHECK-NEXT:    ret void
 ;
 ;
@@ -36,9 +36,3 @@ entry:
 ; CHECK-NEXT:    [[RES:%.*]] = add i32 [[X]], [[Y]]
 ; CHECK-NEXT:    ret i32 [[RES]]
 ;
-;
-; CHECK-LABEL: define internal i32 @add.specialized.1(
-; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    ret i32 poison
-;
diff --git a/llvm/test/Transforms/FunctionSpecialization/recursive-penalty.ll b/llvm/test/Transforms/FunctionSpecialization/recursive-penalty.ll
index fc17387dec94d..ff90634ddd424 100644
--- a/llvm/test/Transforms/FunctionSpecialization/recursive-penalty.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/recursive-penalty.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
 ; REQUIRES: asserts
 ; RUN: opt -passes="ipsccp<func-spec>,inline,instcombine,simplifycfg" -S \
 ; RUN:     -funcspec-min-function-size=23 -funcspec-max-iters=100 \
@@ -6,11 +7,40 @@
 ; Make sure the number of specializations created are not
 ; linear to the number of iterations (funcspec-max-iters).
 
-; CHECK: FnSpecialization: Created 4 specializations in module
-
 @Global = internal constant i32 1, align 4
 
 define internal void @recursiveFunc(ptr readonly %arg) {
+; CHECK-LABEL: define internal void @recursiveFunc(
+; CHECK-SAME: ptr readonly [[ARG:%.*]]) {
+; CHECK-NEXT:    [[TEMP:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[ARG_LOAD:%.*]] = load i32, ptr [[ARG]], align 4
+; CHECK-NEXT:    [[ARG_CMP:%.*]] = icmp slt i32 [[ARG_LOAD]], 10000
+; CHECK-NEXT:    br i1 [[ARG_CMP]], label %[[LOOP1:.*]], label %[[RET_BLOCK:.*]]
+; CHECK:       [[LOOP1]]:
+; CHECK-NEXT:    br label %[[LOOP2:.*]]
+; CHECK:       [[LOOP2]]:
+; CHECK-NEXT:    br label %[[LOOP3:.*]]
+; CHECK:       [[LOOP3]]:
+; CHECK-NEXT:    br label %[[LOOP4:.*]]
+; CHECK:       [[LOOP4]]:
+; CHECK-NEXT:    call void @print_val(i32 [[ARG_LOAD]])
+; CHECK-NEXT:    [[ARG_ADD:%.*]] = add nsw i32 [[ARG_LOAD]], 1
+; CHECK-NEXT:    store i32 [[ARG_ADD]], ptr [[TEMP]], align 4
+; CHECK-NEXT:    call void @recursiveFunc(ptr nonnull [[TEMP]])
+; CHECK-NEXT:    [[EXIT_COND1:%.*]] = call i1 @exit_cond()
+; CHECK-NEXT:    br i1 [[EXIT_COND1]], label %[[LOOP4]], label %[[LOOP3_END:.*]]
+; CHECK:       [[LOOP3_END]]:
+; CHECK-NEXT:    [[EXIT_COND2:%.*]] = call i1 @exit_cond()
+; CHECK-NEXT:    br i1 [[EXIT_COND2]], label %[[LOOP3]], label %[[LOOP2_END:.*]]
+; CHECK:       [[LOOP2_END]]:
+; CHECK-NEXT:    [[EXIT_COND3:%.*]] = call i1 @exit_cond()
+; CHECK-NEXT:    br i1 [[EXIT_COND3]], label %[[LOOP2]], label %[[LOOP1_END:.*]]
+; CHECK:       [[LOOP1_END]]:
+; CHECK-NEXT:    [[EXIT_COND4:%.*]] = call i1 @exit_cond()
+; CHECK-NEXT:    br i1 [[EXIT_COND4]], label %[[LOOP1]], label %[[RET_BLOCK]]
+; CHECK:       [[RET_BLOCK]]:
+; CHECK-NEXT:    ret void
+;
   %temp = alloca i32, align 4
   %arg.load = load i32, ptr %arg, align 4
   %arg.cmp = icmp slt i32 %arg.load, 10000
@@ -56,6 +86,10 @@ ret.block:
 }
 
 define i32 @main() {
+; CHECK-LABEL: define i32 @main() {
+; CHECK-NEXT:    call void @recursiveFunc(ptr nonnull @Global)
+; CHECK-NEXT:    ret i32 0
+;
   call void @recursiveFunc(ptr @Global)
   ret i32 0
 }

>From 92297fcad87861f9838e1e13b0c0c99646c5e62a Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 30 Sep 2025 09:10:38 -0700
Subject: [PATCH 02/23] [FnSpecialization] Add new test for chained
 specialization

---
 .../specialize-chain.ll                       | 132 ++++++++++++++++++
 1 file changed, 132 insertions(+)
 create mode 100644 llvm/test/Transforms/FunctionSpecialization/specialize-chain.ll

diff --git a/llvm/test/Transforms/FunctionSpecialization/specialize-chain.ll b/llvm/test/Transforms/FunctionSpecialization/specialize-chain.ll
new file mode 100644
index 0000000000000..8b3a028ca1aa7
--- /dev/null
+++ b/llvm/test/Transforms/FunctionSpecialization/specialize-chain.ll
@@ -0,0 +1,132 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
+; REQUIRES: asserts
+; RUN: opt -passes=ipsccp -S -funcspec-min-function-size=1 -funcspec-min-codesize-savings=20 -debug-only=function-specialization < %s 2>&1 | FileCheck %s
+
+define i32 @incr(i32 %a) {
+  %b = add nsw i32 %a, 1
+  %c = add nsw i32 %b, 1
+  %d = add nsw i32 %c, 1
+  %e = add nsw i32 %d, 1
+  %f = add nsw i32 %e, 1
+  %g = add nsw i32 %f, 1
+  ret i32 %g
+}
+
+define i32 @forward_outer(i32 %a) {
+entry:
+  %call = call i32 @forward_inner(i32 %a)
+  ret i32 %call
+}
+
+define i32 @multi_call(i32 %a) {
+entry:
+  %call = call i32 @incr(i32 %a)
+  %mul = mul nsw i32 %a, 2
+  %mul_call = call i32 @incr(i32 %mul)
+  ret i32 %call
+}
+
+define i32 @forward_inner(i32 %a) {
+entry:
+  %call = call i32 @incr(i32 %a)
+  ret i32 %call
+}
+
+define i32 @forward_unfold(i32 %a) {
+entry:
+  %b = mul nsw i32 %a, 10
+  %call = call i32 @incr(i32 %b)
+  %c = mul nsw i32 %call, 20
+  ret i32 %c
+}
+
+define dso_local signext i32 @intrinsic(i64 %a) {
+  %local_dest = alloca [1024 x i32], align 4
+  %local_src = alloca [1024 x i32], align 4
+  call void @llvm.memcpy.p0.p0.i64(ptr %local_dest, ptr %local_src, i64 %a, i1 false)
+  ret i32 0
+}
+
+define i32 @main() {
+entry:
+  %add = call i32 @incr(i32 10)
+  %int = call i32 @intrinsic(i32 3)
+  %fwd_unfold = call i32 @forward_unfold(i32 3)
+  %fwd_inner = call i32 @forward_inner(i32 3)
+  %fwd_outer = call i32 @forward_outer(i32 3)
+  %fwd_outer1 = call i32 @forward_outer(i32 3)
+  %multi_call = call i32 @multi_call(i32 5)
+  ret i32 %multi_call
+}
+
+
+
+
+
+; CHECK-LABEL: define range(i32 -2147483642, -2147483648) i32 @incr(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[A]], 1
+; CHECK-NEXT:    [[C:%.*]] = add nsw i32 [[INC]], 1
+; CHECK-NEXT:    [[D:%.*]] = add nsw i32 [[C]], 1
+; CHECK-NEXT:    [[E:%.*]] = add nsw i32 [[D]], 1
+; CHECK-NEXT:    [[F:%.*]] = add nsw i32 [[E]], 1
+; CHECK-NEXT:    [[G:%.*]] = add nsw i32 [[F]], 1
+; CHECK-NEXT:    ret i32 [[G]]
+;
+;
+; CHECK-LABEL: define range(i32 -2147483642, -2147483648) i32 @forward_outer(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @forward_inner(i32 [[A]])
+; CHECK-NEXT:    ret i32 [[CALL]]
+;
+;
+; CHECK-LABEL: define range(i32 -2147483642, -2147483648) i32 @multi_call(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @incr(i32 [[A]])
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[A]], 2
+; CHECK-NEXT:    [[MUL_CALL:%.*]] = call i32 @incr(i32 [[MUL]])
+; CHECK-NEXT:    ret i32 [[CALL]]
+;
+;
+; CHECK-LABEL: define range(i32 -2147483642, -2147483648) i32 @forward_inner(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @incr(i32 [[A]])
+; CHECK-NEXT:    ret i32 [[CALL]]
+;
+;
+; CHECK-LABEL: define i32 @forward_unfold(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[B:%.*]] = mul nsw i32 [[A]], 10
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @incr(i32 [[B]])
+; CHECK-NEXT:    [[C:%.*]] = mul nsw i32 [[CALL]], 20
+; CHECK-NEXT:    ret i32 [[C]]
+;
+;
+; CHECK-LABEL: define dso_local signext i32 @intrinsic(
+; CHECK-SAME: i64 [[A:%.*]]) {
+; CHECK-NEXT:    [[LOCAL_DEST:%.*]] = alloca [1024 x i32], align 4
+; CHECK-NEXT:    [[LOCAL_SRC:%.*]] = alloca [1024 x i32], align 4
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr [[LOCAL_DEST]], ptr [[LOCAL_SRC]], i64 [[A]], i1 false)
+; CHECK-NEXT:    ret i32 0
+;
+;
+; CHECK-LABEL: define range(i32 -2147483642, -2147483648) i32 @main() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[ADD:%.*]] = call i32 @incr.specialized.1(i32 10)
+; CHECK-NEXT:    [[INT:%.*]] = call i32 @intrinsic(i32 3)
+; CHECK-NEXT:    [[FWD_UNFOLD:%.*]] = call i32 @forward_unfold(i32 3)
+; CHECK-NEXT:    [[FWD_INNER:%.*]] = call i32 @forward_inner(i32 3)
+; CHECK-NEXT:    [[FWD_OUTER:%.*]] = call i32 @forward_outer(i32 3)
+; CHECK-NEXT:    [[FWD_OUTER1:%.*]] = call i32 @forward_outer(i32 3)
+; CHECK-NEXT:    [[MULTI_CALL:%.*]] = call i32 @multi_call(i32 5)
+; CHECK-NEXT:    ret i32 [[MULTI_CALL]]
+;
+;
+; CHECK-LABEL: define internal i32 @incr.specialized.1(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT:    ret i32 poison
+;

>From 5ea886987a3abfef0ba13320374fb4ebed6e60ef Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Mon, 29 Sep 2025 15:15:25 -0700
Subject: [PATCH 03/23] [FnSpecialization] Refactor SpecCall::CallSites to
 contain a data structure

The data structure will eventually contain extra data for chained and indirect
specialization.
---
 .../llvm/Transforms/IPO/FunctionSpecialization.h       | 10 +++++++++-
 llvm/lib/Transforms/IPO/FunctionSpecialization.cpp     |  9 +++++----
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index 5a682e8c7b5eb..120ed2117992c 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -93,6 +93,8 @@
 #include "llvm/Transforms/Utils/SizeOpts.h"
 
 namespace llvm {
+struct Spec;
+
 // Map of potential specializations for each function. The FunctionSpecializer
 // keeps the discovered specialisation opportunities for the module in a single
 // vector, where the specialisations of each function form a contiguous range.
@@ -124,6 +126,10 @@ struct SpecSig {
   }
 };
 
+struct SpecCall {
+  CallBase *CallSite;
+};
+
 // Specialization instance.
 struct Spec {
   // Original function.
@@ -142,7 +148,9 @@ struct Spec {
   unsigned CodeSize;
 
   // List of call sites, matching this specialization.
-  SmallVector<CallBase *> CallSites;
+  SmallVector<SpecCall> CallSites;
+
+  void addCall(SpecCall SC) { CallSites.push_back(SC); }
 
   Spec(Function *F, const SpecSig &S, unsigned Score, unsigned CodeSize)
       : F(F), Sig(S), Score(Score), CodeSize(CodeSize) {}
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 6d4b2fb7e0065..6743fffd585df 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -790,11 +790,12 @@ bool FunctionSpecializer::run() {
     S.Clone = createSpecialization(S.F, S.Sig);
 
     // Update the known call sites to call the clone.
-    for (CallBase *Call : S.CallSites) {
+    for (auto &CS : S.CallSites) {
       Function *Clone = S.Clone;
+      CallBase *Call = CS.CallSite;
       LLVM_DEBUG(dbgs() << "FnSpecialization: Redirecting " << *Call
                         << " to call " << Clone->getName() << "\n");
-      Call->setCalledFunction(S.Clone);
+      Call->setCalledFunction(Clone);
       auto &BFI = GetBFI(*Call->getFunction());
       std::optional<uint64_t> Count =
           BFI.getBlockProfileCount(Call->getParent());
@@ -961,7 +962,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
       if (CS.getFunction() == F)
         continue;
       const unsigned Index = It->second;
-      AllSpecs[Index].CallSites.push_back(&CS);
+      AllSpecs[Index].addCall({&CS});
     } else {
       // Calculate the specialisation gain.
       Cost CodeSize;
@@ -1025,7 +1026,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
       // Create a new specialisation entry.
       auto &Spec = AllSpecs.emplace_back(F, S, Score, SpecSize);
       if (CS.getFunction() != F)
-        Spec.CallSites.push_back(&CS);
+        Spec.addCall({&CS});
       const unsigned Index = AllSpecs.size() - 1;
       UniqueSpecs[S] = Index;
       if (auto [It, Inserted] = SM.try_emplace(F, Index, Index + 1); !Inserted)

>From ca8182b63471435e66cce4a13372f6a32bcfe994 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Mon, 29 Sep 2025 15:30:02 -0700
Subject: [PATCH 04/23] [FnSpecialization] Refactor main loop in run() to pull
 out the loop logic to its own function

Will want to call recursively for chains.
---
 .../Transforms/IPO/FunctionSpecialization.h   |   7 ++
 .../Transforms/IPO/FunctionSpecialization.cpp | 109 +++++++++---------
 2 files changed, 64 insertions(+), 52 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index 120ed2117992c..b594a01aeac21 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -307,6 +307,13 @@ class FunctionSpecializer {
   bool findSpecializations(Function *F, unsigned FuncSize,
                            SmallVectorImpl<Spec> &AllSpecs, SpecMap &SM);
 
+  /// @brief Find specialization opportunities for a given function.
+  /// @param F Function to specialize
+  /// @param SM  A map for a function's specialisation range
+  /// @param AllSpecs A vector to add potential specializations to.
+  /// @return True, if any potential specializations were found
+  bool runOneSpec(Function &F, SpecMap &SM, SmallVectorImpl<Spec> &AllSpecs);
+
   /// Compute the inlining bonus for replacing argument \p A with constant \p C.
   unsigned getInliningBonus(Argument *A, Constant *C);
 
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 6743fffd585df..af57c0ce3394a 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -668,6 +668,61 @@ static unsigned getCostValue(const Cost &C) {
   return static_cast<unsigned>(Value);
 }
 
+bool FunctionSpecializer::runOneSpec(Function &F, SpecMap &SM,
+                                     SmallVectorImpl<Spec> &AllSpecs) {
+  if (!isCandidateFunction(&F))
+    return false;
+
+  auto [It, Inserted] = FunctionMetrics.try_emplace(&F);
+  CodeMetrics &Metrics = It->second;
+  // Analyze the function.
+  if (Inserted) {
+    SmallPtrSet<const Value *, 32> EphValues;
+    CodeMetrics::collectEphemeralValues(&F, &GetAC(F), EphValues);
+    for (BasicBlock &BB : F)
+      Metrics.analyzeBasicBlock(&BB, GetTTI(F), EphValues);
+  }
+
+  // When specializing literal constants is enabled, always require functions
+  // to be larger than MinFunctionSize, to prevent excessive specialization.
+  const bool RequireMinSize =
+      !ForceSpecialization &&
+      (SpecializeLiteralConstant || !F.hasFnAttribute(Attribute::NoInline));
+
+  // If the code metrics reveal that we shouldn't duplicate the function,
+  // or if the code size implies that this function is easy to get inlined,
+  // then we shouldn't specialize it.
+  if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() ||
+      (RequireMinSize && Metrics.NumInsts < MinFunctionSize))
+    return false;
+
+  // When specialization on literal constants is disabled, only consider
+  // recursive functions when running multiple times to save wasted analysis,
+  // as we will not be able to specialize on any newly found literal constant
+  // return values.
+  if (!SpecializeLiteralConstant && !Inserted && !Metrics.isRecursive)
+    return false;
+
+  int64_t Sz = Metrics.NumInsts.getValue();
+  assert(Sz > 0 && "CodeSize should be positive");
+  // It is safe to down cast from int64_t, NumInsts is always positive.
+  unsigned FuncSize = static_cast<unsigned>(Sz);
+
+  LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for "
+                    << F.getName() << " is " << FuncSize << "\n");
+
+  if (Inserted && Metrics.isRecursive)
+    promoteConstantStackValues(&F);
+
+  if (!findSpecializations(&F, FuncSize, AllSpecs, SM)) {
+    LLVM_DEBUG(
+        dbgs() << "FnSpecialization: No possible specializations found for "
+               << F.getName() << "\n");
+    return false;
+  }
+  return true;
+}
+
 /// Attempt to specialize functions in the module to enable constant
 /// propagation across function boundaries.
 ///
@@ -678,58 +733,8 @@ bool FunctionSpecializer::run() {
   SmallVector<Spec, 32> AllSpecs;
   unsigned NumCandidates = 0;
   for (Function &F : M) {
-    if (!isCandidateFunction(&F))
-      continue;
-
-    auto [It, Inserted] = FunctionMetrics.try_emplace(&F);
-    CodeMetrics &Metrics = It->second;
-    //Analyze the function.
-    if (Inserted) {
-      SmallPtrSet<const Value *, 32> EphValues;
-      CodeMetrics::collectEphemeralValues(&F, &GetAC(F), EphValues);
-      for (BasicBlock &BB : F)
-        Metrics.analyzeBasicBlock(&BB, GetTTI(F), EphValues);
-    }
-
-    // When specializing literal constants is enabled, always require functions
-    // to be larger than MinFunctionSize, to prevent excessive specialization.
-    const bool RequireMinSize =
-        !ForceSpecialization &&
-        (SpecializeLiteralConstant || !F.hasFnAttribute(Attribute::NoInline));
-
-    // If the code metrics reveal that we shouldn't duplicate the function,
-    // or if the code size implies that this function is easy to get inlined,
-    // then we shouldn't specialize it.
-    if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() ||
-        (RequireMinSize && Metrics.NumInsts < MinFunctionSize))
-      continue;
-
-    // When specialization on literal constants is disabled, only consider
-    // recursive functions when running multiple times to save wasted analysis,
-    // as we will not be able to specialize on any newly found literal constant
-    // return values.
-    if (!SpecializeLiteralConstant && !Inserted && !Metrics.isRecursive)
-      continue;
-
-    int64_t Sz = Metrics.NumInsts.getValue();
-    assert(Sz > 0 && "CodeSize should be positive");
-    // It is safe to down cast from int64_t, NumInsts is always positive.
-    unsigned FuncSize = static_cast<unsigned>(Sz);
-
-    LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for "
-                      << F.getName() << " is " << FuncSize << "\n");
-
-    if (Inserted && Metrics.isRecursive)
-      promoteConstantStackValues(&F);
-
-    if (!findSpecializations(&F, FuncSize, AllSpecs, SM)) {
-      LLVM_DEBUG(
-          dbgs() << "FnSpecialization: No possible specializations found for "
-                 << F.getName() << "\n");
-      continue;
-    }
-
-    ++NumCandidates;
+    if (runOneSpec(F, SM, AllSpecs))
+      ++NumCandidates;
   }
 
   if (!NumCandidates) {

>From 24cc5b09ec3d2f644a85e1faab5951b53a27438e Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Mon, 29 Sep 2025 15:48:45 -0700
Subject: [PATCH 05/23] [FnSpecialization] Refactor logic for actually
 performing the specialization into macro

Will need to call recursively.

No functional change.
---
 .../Transforms/IPO/FunctionSpecialization.cpp | 72 ++++++++++---------
 1 file changed, 37 insertions(+), 35 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index af57c0ce3394a..8d2009192e50f 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -788,45 +788,47 @@ bool FunctionSpecializer::run() {
   for (unsigned I = 0; I < NSpecs; ++I) {
     Spec &S = AllSpecs[BestSpecs[I]];
 
-    // Accumulate the codesize growth for the function, now we are creating the
-    // specialization.
-    FunctionGrowth[S.F] += S.CodeSize;
-
-    S.Clone = createSpecialization(S.F, S.Sig);
-
-    // Update the known call sites to call the clone.
-    for (auto &CS : S.CallSites) {
-      Function *Clone = S.Clone;
-      CallBase *Call = CS.CallSite;
-      LLVM_DEBUG(dbgs() << "FnSpecialization: Redirecting " << *Call
-                        << " to call " << Clone->getName() << "\n");
-      Call->setCalledFunction(Clone);
-      auto &BFI = GetBFI(*Call->getFunction());
-      std::optional<uint64_t> Count =
-          BFI.getBlockProfileCount(Call->getParent());
-      if (Count && !ProfcheckDisableMetadataFixes) {
-        std::optional<llvm::Function::ProfileCount> MaybeCloneCount =
-            Clone->getEntryCount();
-        if (MaybeCloneCount) {
-          uint64_t CallCount = *Count + MaybeCloneCount->getCount();
-          Clone->setEntryCount(CallCount);
-          if (std::optional<llvm::Function::ProfileCount> MaybeOriginalCount =
-                  S.F->getEntryCount()) {
-            uint64_t OriginalCount = MaybeOriginalCount->getCount();
-            if (OriginalCount >= *Count) {
-              S.F->setEntryCount(OriginalCount - *Count);
-            } else {
-              // This should generally not happen as that would mean there are
-              // more computed calls to the function than what was recorded.
-              LLVM_DEBUG(S.F->setEntryCount(0));
+    auto actuallySpecialize = [&](Spec &S) -> void {
+      // Accumulate the codesize growth for the function, now we are creating
+      // the specialization.
+      FunctionGrowth[S.F] += S.CodeSize;
+
+      S.Clone = createSpecialization(S.F, S.Sig);
+
+      // Update the known call sites to call the clone.
+      for (auto &CS : S.CallSites) {
+        Function *Clone = S.Clone;
+        CallBase *Call = CS.CallSite;
+        LLVM_DEBUG(dbgs() << "FnSpecialization: Redirecting " << *Call
+                          << " to call " << Clone->getName() << "\n");
+        Call->setCalledFunction(Clone);
+        auto &BFI = GetBFI(*Call->getFunction());
+        std::optional<uint64_t> Count =
+            BFI.getBlockProfileCount(Call->getParent());
+        if (Count && !ProfcheckDisableMetadataFixes) {
+          std::optional<llvm::Function::ProfileCount> MaybeCloneCount =
+              Clone->getEntryCount();
+          if (MaybeCloneCount) {
+            uint64_t CallCount = *Count + MaybeCloneCount->getCount();
+            Clone->setEntryCount(CallCount);
+            if (std::optional<llvm::Function::ProfileCount> MaybeOriginalCount =
+                    S.F->getEntryCount()) {
+              uint64_t OriginalCount = MaybeOriginalCount->getCount();
+              if (OriginalCount >= *Count) {
+                S.F->setEntryCount(OriginalCount - *Count);
+              } else {
+                // This should generally not happen as that would mean there are
+                // more computed calls to the function than what was recorded.
+                LLVM_DEBUG(S.F->setEntryCount(0));
+              }
             }
           }
         }
       }
-    }
-
-    Clones.push_back(S.Clone);
-    OriginalFuncs.insert(S.F);
+      Clones.push_back(S.Clone);
+      OriginalFuncs.insert(S.F);
+    };
+    actuallySpecialize(S);
   }
 
   Solver.solveWhileResolvedUndefsIn(Clones);

>From 32ef1d7df52ade01d9a235ddc3d652c397f8abf3 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Mon, 29 Sep 2025 17:58:50 -0700
Subject: [PATCH 06/23] [FnSpecialization] Pass a Spec to runOneSpec() rather
 than a Function

Spec contains a Function, and will need to pass extra information
with Chaining.
---
 .../llvm/Transforms/IPO/FunctionSpecialization.h    | 10 ++++++----
 llvm/lib/Transforms/IPO/FunctionSpecialization.cpp  | 13 ++++++++-----
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index b594a01aeac21..6959ccfb43b5a 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -156,6 +156,8 @@ struct Spec {
       : F(F), Sig(S), Score(Score), CodeSize(CodeSize) {}
   Spec(Function *F, const SpecSig &&S, unsigned Score, unsigned CodeSize)
       : F(F), Sig(S), Score(Score), CodeSize(CodeSize) {}
+  Spec(Function *F)
+      : F(F), Clone(nullptr), Sig(), Score(0), CodeSize(), CallSites(0) {}
 };
 
 class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
@@ -304,15 +306,15 @@ class FunctionSpecializer {
   /// @param AllSpecs A vector to add potential specializations to.
   /// @param SM  A map for a function's specialisation range
   /// @return True, if any potential specializations were found
-  bool findSpecializations(Function *F, unsigned FuncSize,
-                           SmallVectorImpl<Spec> &AllSpecs, SpecMap &SM);
+  bool findSpecializations(unsigned FuncSize, SmallVectorImpl<Spec> &AllSpecs,
+                           SpecMap &SM, Spec &InS);
 
   /// @brief Find specialization opportunities for a given function.
-  /// @param F Function to specialize
+  /// @param S Specialization to complete, possibly with a Callsite attached.
   /// @param SM  A map for a function's specialisation range
   /// @param AllSpecs A vector to add potential specializations to.
   /// @return True, if any potential specializations were found
-  bool runOneSpec(Function &F, SpecMap &SM, SmallVectorImpl<Spec> &AllSpecs);
+  bool runOneSpec(Spec &S, SpecMap &SM, SmallVectorImpl<Spec> &AllSpecs);
 
   /// Compute the inlining bonus for replacing argument \p A with constant \p C.
   unsigned getInliningBonus(Argument *A, Constant *C);
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 8d2009192e50f..008de3b9e8337 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -668,8 +668,9 @@ static unsigned getCostValue(const Cost &C) {
   return static_cast<unsigned>(Value);
 }
 
-bool FunctionSpecializer::runOneSpec(Function &F, SpecMap &SM,
+bool FunctionSpecializer::runOneSpec(Spec &S, SpecMap &SM,
                                      SmallVectorImpl<Spec> &AllSpecs) {
+  Function &F = *(S.F);
   if (!isCandidateFunction(&F))
     return false;
 
@@ -714,7 +715,7 @@ bool FunctionSpecializer::runOneSpec(Function &F, SpecMap &SM,
   if (Inserted && Metrics.isRecursive)
     promoteConstantStackValues(&F);
 
-  if (!findSpecializations(&F, FuncSize, AllSpecs, SM)) {
+  if (!findSpecializations(FuncSize, AllSpecs, SM, S)) {
     LLVM_DEBUG(
         dbgs() << "FnSpecialization: No possible specializations found for "
                << F.getName() << "\n");
@@ -733,7 +734,8 @@ bool FunctionSpecializer::run() {
   SmallVector<Spec, 32> AllSpecs;
   unsigned NumCandidates = 0;
   for (Function &F : M) {
-    if (runOneSpec(F, SM, AllSpecs))
+    Spec S(&F);
+    if (runOneSpec(S, SM, AllSpecs))
       ++NumCandidates;
   }
 
@@ -906,9 +908,10 @@ static Function *cloneCandidateFunction(Function *F, unsigned NSpecs) {
   return Clone;
 }
 
-bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
+bool FunctionSpecializer::findSpecializations(unsigned FuncSize,
                                               SmallVectorImpl<Spec> &AllSpecs,
-                                              SpecMap &SM) {
+                                              SpecMap &SM, Spec &InS) {
+  Function *F = InS.F;
   // A mapping from a specialisation signature to the index of the respective
   // entry in the all specialisation array. Used to ensure uniqueness of
   // specialisations.

>From d6a2c96a173aa53fb96b8f949af1666620e523a4 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Mon, 29 Sep 2025 21:19:56 -0700
Subject: [PATCH 07/23] [FnSpecialization] Use the same UniqueSpecs across
 entire run()

Used to be a single object within findSpecializations() since
each Function only entered findSpecializations() once. But will
now be going in arbitrary order with Chains.
---
 .../Transforms/IPO/FunctionSpecialization.h   |  7 +++++--
 .../Transforms/IPO/FunctionSpecialization.cpp | 21 ++++++++++---------
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index 6959ccfb43b5a..f621554e83636 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -307,14 +307,17 @@ class FunctionSpecializer {
   /// @param SM  A map for a function's specialisation range
   /// @return True, if any potential specializations were found
   bool findSpecializations(unsigned FuncSize, SmallVectorImpl<Spec> &AllSpecs,
-                           SpecMap &SM, Spec &InS);
+                           SpecMap &SM, Spec &InS,
+                           DenseMap<SpecSig, unsigned> &UniqueSpecs);
 
   /// @brief Find specialization opportunities for a given function.
   /// @param S Specialization to complete, possibly with a Callsite attached.
   /// @param SM  A map for a function's specialisation range
   /// @param AllSpecs A vector to add potential specializations to.
+  /// @param UniqueSpecs Map of existing specializations.
   /// @return True, if any potential specializations were found
-  bool runOneSpec(Spec &S, SpecMap &SM, SmallVectorImpl<Spec> &AllSpecs);
+  bool runOneSpec(Spec &S, SpecMap &SM, SmallVectorImpl<Spec> &AllSpecs,
+                  DenseMap<SpecSig, unsigned> &UniqueSpecs);
 
   /// Compute the inlining bonus for replacing argument \p A with constant \p C.
   unsigned getInliningBonus(Argument *A, Constant *C);
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 008de3b9e8337..15e6ecddc615e 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -669,7 +669,8 @@ static unsigned getCostValue(const Cost &C) {
 }
 
 bool FunctionSpecializer::runOneSpec(Spec &S, SpecMap &SM,
-                                     SmallVectorImpl<Spec> &AllSpecs) {
+                                     SmallVectorImpl<Spec> &AllSpecs,
+                                     DenseMap<SpecSig, unsigned> &UniqueSpecs) {
   Function &F = *(S.F);
   if (!isCandidateFunction(&F))
     return false;
@@ -715,7 +716,7 @@ bool FunctionSpecializer::runOneSpec(Spec &S, SpecMap &SM,
   if (Inserted && Metrics.isRecursive)
     promoteConstantStackValues(&F);
 
-  if (!findSpecializations(FuncSize, AllSpecs, SM, S)) {
+  if (!findSpecializations(FuncSize, AllSpecs, SM, S, UniqueSpecs)) {
     LLVM_DEBUG(
         dbgs() << "FnSpecialization: No possible specializations found for "
                << F.getName() << "\n");
@@ -732,10 +733,14 @@ bool FunctionSpecializer::run() {
   // Find possible specializations for each function.
   SpecMap SM;
   SmallVector<Spec, 32> AllSpecs;
+  // A mapping from a specialisation signature to the index of the respective
+  // entry in the all specialisation array. Used to ensure uniqueness of
+  // specialisations.
+  DenseMap<SpecSig, unsigned> UniqueSpecs;
   unsigned NumCandidates = 0;
   for (Function &F : M) {
     Spec S(&F);
-    if (runOneSpec(S, SM, AllSpecs))
+    if (runOneSpec(S, SM, AllSpecs, UniqueSpecs))
       ++NumCandidates;
   }
 
@@ -908,14 +913,10 @@ static Function *cloneCandidateFunction(Function *F, unsigned NSpecs) {
   return Clone;
 }
 
-bool FunctionSpecializer::findSpecializations(unsigned FuncSize,
-                                              SmallVectorImpl<Spec> &AllSpecs,
-                                              SpecMap &SM, Spec &InS) {
+bool FunctionSpecializer::findSpecializations(
+    unsigned FuncSize, SmallVectorImpl<Spec> &AllSpecs, SpecMap &SM, Spec &InS,
+    DenseMap<SpecSig, unsigned> &UniqueSpecs) {
   Function *F = InS.F;
-  // A mapping from a specialisation signature to the index of the respective
-  // entry in the all specialisation array. Used to ensure uniqueness of
-  // specialisations.
-  DenseMap<SpecSig, unsigned> UniqueSpecs;
 
   // Get a list of interesting arguments.
   SmallVector<Argument *> Args;

>From 37b9a5daaefce9b89f67ab4f2d99fb34b1b287cf Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 30 Sep 2025 15:15:10 -0700
Subject: [PATCH 08/23] [FnSpecialization] Don't rely on UniqueSpec to
 determine if specialization occurred

---
 llvm/lib/Transforms/IPO/FunctionSpecialization.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 15e6ecddc615e..46cad15936abe 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -917,6 +917,7 @@ bool FunctionSpecializer::findSpecializations(
     unsigned FuncSize, SmallVectorImpl<Spec> &AllSpecs, SpecMap &SM, Spec &InS,
     DenseMap<SpecSig, unsigned> &UniqueSpecs) {
   Function *F = InS.F;
+  bool FoundSpecialization = false;
 
   // Get a list of interesting arguments.
   SmallVector<Argument *> Args;
@@ -1040,12 +1041,15 @@ bool FunctionSpecializer::findSpecializations(
         Spec.addCall({&CS});
       const unsigned Index = AllSpecs.size() - 1;
       UniqueSpecs[S] = Index;
+
+      FoundSpecialization = true;
+
       if (auto [It, Inserted] = SM.try_emplace(F, Index, Index + 1); !Inserted)
         It->second.second = Index + 1;
     }
   }
 
-  return !UniqueSpecs.empty();
+  return FoundSpecialization;
 }
 
 bool FunctionSpecializer::isCandidateFunction(Function *F) {

>From 2bc068ba0bf201340223f4b50beef84a2cde7b97 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Mon, 29 Sep 2025 12:30:28 -0700
Subject: [PATCH 09/23] [FnSpecialization] Modify SpecMap to hold a pointer to
 every specialization

Cannot rely on AllSpecs to be inorder after Chaining.
---
 .../Transforms/IPO/FunctionSpecialization.h     | 13 +++++--------
 .../Transforms/IPO/FunctionSpecialization.cpp   | 17 ++++++++---------
 2 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index f621554e83636..5507c7281222f 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -95,11 +95,8 @@
 namespace llvm {
 struct Spec;
 
-// Map of potential specializations for each function. The FunctionSpecializer
-// keeps the discovered specialisation opportunities for the module in a single
-// vector, where the specialisations of each function form a contiguous range.
-// This map's value is the beginning and the end of that range.
-using SpecMap = DenseMap<Function *, std::pair<unsigned, unsigned>>;
+// Map of potential specializations for each function.
+using SpecMap = DenseMap<Function *, SmallVector<unsigned>>;
 
 // Just a shorter abbreviation to improve indentation.
 using Cost = InstructionCost;
@@ -340,9 +337,9 @@ class FunctionSpecializer {
 
   /// @brief Find and update calls to \p F, which match a specialization
   /// @param F Orginal function
-  /// @param Begin Start of a range of possibly matching specialisations
-  /// @param End End of a range (exclusive) of possibly matching specialisations
-  void updateCallSites(Function *F, const Spec *Begin, const Spec *End);
+  /// @param Specs Vector of possibly matching specialisations
+  void updateCallSites(Function *F, const SmallVector<unsigned> &Specs,
+                       SmallVector<Spec, 32> AllSpecs);
 };
 } // namespace llvm
 
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 46cad15936abe..607468f396296 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -843,10 +843,8 @@ bool FunctionSpecializer::run() {
   // Update the rest of the call sites - these are the recursive calls, calls
   // to discarded specialisations and calls that may match a specialisation
   // after the solver runs.
-  for (Function *F : OriginalFuncs) {
-    auto [Begin, End] = SM[F];
-    updateCallSites(F, AllSpecs.begin() + Begin, AllSpecs.begin() + End);
-  }
+  for (Function *F : OriginalFuncs)
+    updateCallSites(F, SM[F], AllSpecs);
 
   for (Function *F : Clones) {
     if (F->getReturnType()->isVoidTy())
@@ -1044,8 +1042,7 @@ bool FunctionSpecializer::findSpecializations(
 
       FoundSpecialization = true;
 
-      if (auto [It, Inserted] = SM.try_emplace(F, Index, Index + 1); !Inserted)
-        It->second.second = Index + 1;
+      SM[F].push_back(Index);
     }
   }
 
@@ -1223,8 +1220,9 @@ Constant *FunctionSpecializer::getCandidateConstant(Value *V) {
   return C;
 }
 
-void FunctionSpecializer::updateCallSites(Function *F, const Spec *Begin,
-                                          const Spec *End) {
+void FunctionSpecializer::updateCallSites(Function *F,
+                                          const SmallVector<unsigned> &Specs,
+                                          SmallVector<Spec, 32> AllSpecs) {
   // Collect the call sites that need updating.
   SmallVector<CallBase *> ToUpdate;
   for (User *U : F->users())
@@ -1239,7 +1237,8 @@ void FunctionSpecializer::updateCallSites(Function *F, const Spec *Begin,
 
     // Find the best matching specialisation.
     const Spec *BestSpec = nullptr;
-    for (const Spec &S : make_range(Begin, End)) {
+    for (const unsigned SI : Specs) {
+      const auto &S = AllSpecs[SI];
       if (!S.Clone || (BestSpec && S.Score <= BestSpec->Score))
         continue;
 

>From 6a8cfdce5f0f384ba65f7789e3208ac5703b657f Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Mon, 29 Sep 2025 18:18:41 -0700
Subject: [PATCH 10/23] [FnSpecialization] (1/6) Enable function specialization
 chaining

If a function is called with constants that passes those constants to another function,
try to specialize both of those functions.
---
 .../Transforms/IPO/FunctionSpecialization.h   |  51 ++-
 .../Transforms/IPO/FunctionSpecialization.cpp | 331 ++++++++++++++----
 2 files changed, 301 insertions(+), 81 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index 5507c7281222f..96dbd5d9a31e5 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -95,9 +95,13 @@
 namespace llvm {
 struct Spec;
 
+struct SpecSig;
+
 // Map of potential specializations for each function.
 using SpecMap = DenseMap<Function *, SmallVector<unsigned>>;
 
+using CallUserT = SmallMapVector<CallBase *, std::pair<SpecSig, Function *>, 4>;
+
 // Just a shorter abbreviation to improve indentation.
 using Cost = InstructionCost;
 
@@ -123,8 +127,14 @@ struct SpecSig {
   }
 };
 
+enum CallSiteStatusT {
+  AWAITING_PARENT, HAS_PARENT, NO_PARENT
+};
+
 struct SpecCall {
   CallBase *CallSite;
+  CallSiteStatusT Status;
+  unsigned Parent;
 };
 
 // Specialization instance.
@@ -149,10 +159,20 @@ struct Spec {
 
   void addCall(SpecCall SC) { CallSites.push_back(SC); }
 
-  Spec(Function *F, const SpecSig &S, unsigned Score, unsigned CodeSize)
-      : F(F), Sig(S), Score(Score), CodeSize(CodeSize) {}
-  Spec(Function *F, const SpecSig &&S, unsigned Score, unsigned CodeSize)
-      : F(F), Sig(S), Score(Score), CodeSize(CodeSize) {}
+  // List Sub-Specializations
+  SmallVector<unsigned> SubSpecs;
+
+  // Index within AllSpecs
+  unsigned Loc = 0;
+
+  Spec(Function *F, CallBase *CallSite, const SpecSig &S, CallSiteStatusT Status)
+      : F(F), Clone(nullptr), Sig(S), Score(), CodeSize(), CallSites() {
+    addCall({CallSite, Status, /*Parent*/ 0});
+  }
+  Spec(Function *F, CallBase *CallSite, CallSiteStatusT Status)
+      : F(F), Clone(nullptr), Sig(), Score(), CodeSize(), CallSites() {
+    addCall({CallSite, Status, /*Parent*/ 0});
+  }
   Spec(Function *F)
       : F(F), Clone(nullptr), Sig(), Score(0), CodeSize(), CallSites(0) {}
 };
@@ -187,7 +207,8 @@ class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
     return Solver.isBlockExecutable(BB) && !DeadBlocks.contains(BB);
   }
 
-  LLVM_ABI Cost getCodeSizeSavingsForArg(Argument *A, Constant *C);
+  LLVM_ABI Cost getCodeSizeSavingsForArg(Argument *A, Constant *C,
+                                         CallUserT *CallUsers = nullptr);
 
   LLVM_ABI Cost getCodeSizeSavingsFromPendingPHIs();
 
@@ -201,7 +222,9 @@ class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
   bool canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ) const;
 
   Cost getCodeSizeSavingsForUser(Instruction *User, Value *Use = nullptr,
-                                 Constant *C = nullptr);
+                                 Constant *C = nullptr,
+                                 CallUserT *CallUsers = nullptr,
+                                 llvm::Use *UseEdge = nullptr);
 
   Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList);
   Cost estimateSwitchInst(SwitchInst &I);
@@ -302,19 +325,26 @@ class FunctionSpecializer {
   /// @param FuncSize Cost of specializing a function.
   /// @param AllSpecs A vector to add potential specializations to.
   /// @param SM  A map for a function's specialisation range
+  /// @param CurrentChain Current chain of function calls.
   /// @return True, if any potential specializations were found
   bool findSpecializations(unsigned FuncSize, SmallVectorImpl<Spec> &AllSpecs,
                            SpecMap &SM, Spec &InS,
-                           DenseMap<SpecSig, unsigned> &UniqueSpecs);
+                           DenseMap<SpecSig, unsigned> &UniqueSpecs,
+                           SmallPtrSet<Function *, 4> &CurrentChain);
 
   /// @brief Find specialization opportunities for a given function.
   /// @param S Specialization to complete, possibly with a Callsite attached.
+  /// @param Chained Is this call part of a chain build?
   /// @param SM  A map for a function's specialisation range
   /// @param AllSpecs A vector to add potential specializations to.
   /// @param UniqueSpecs Map of existing specializations.
+  /// @param CurrentChain Current chain of function calls.
+  /// site.
   /// @return True, if any potential specializations were found
-  bool runOneSpec(Spec &S, SpecMap &SM, SmallVectorImpl<Spec> &AllSpecs,
-                  DenseMap<SpecSig, unsigned> &UniqueSpecs);
+  bool runOneSpec(Spec &S, bool Chained, SpecMap &SM,
+                  SmallVectorImpl<Spec> &AllSpecs,
+                  DenseMap<SpecSig, unsigned> &UniqueSpecs,
+                  SmallPtrSet<Function *, 4> CurrentChain);
 
   /// Compute the inlining bonus for replacing argument \p A with constant \p C.
   unsigned getInliningBonus(Argument *A, Constant *C);
@@ -325,7 +355,8 @@ class FunctionSpecializer {
   /// @param F Function to specialize
   /// @param S Which specialization to create
   /// @return The new, cloned function
-  Function *createSpecialization(Function *F, const SpecSig &S);
+  Function *createSpecialization(Function *F, const SpecSig &S,
+                                 ValueToValueMapTy &Mappings);
 
   /// Determine if it is possible to specialise the function for constant values
   /// of the formal parameter \p A.
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 607468f396296..02836a7fc65d8 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -166,15 +166,18 @@ Cost InstCostVisitor::getCodeSizeSavingsFromPendingPHIs() {
 }
 
 /// Compute the codesize savings for replacing argument \p A with constant \p C.
-Cost InstCostVisitor::getCodeSizeSavingsForArg(Argument *A, Constant *C) {
+Cost InstCostVisitor::getCodeSizeSavingsForArg(Argument *A, Constant *C,
+                                               CallUserT *CallUsers) {
   LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: "
                     << C->getNameOrAsOperand() << "\n");
   Cost CodeSize;
-  for (auto *U : A->users())
-    if (auto *UI = dyn_cast<Instruction>(U))
+  for (Use &UseEdge : A->uses()) {
+    User *U = UseEdge.getUser();
+    if (auto *UI = dyn_cast<Instruction>(U)) {
       if (isBlockExecutable(UI->getParent()))
-        CodeSize += getCodeSizeSavingsForUser(UI, A, C);
-
+        CodeSize += getCodeSizeSavingsForUser(UI, A, C, CallUsers, &UseEdge);
+    }
+  }
   LLVM_DEBUG(dbgs() << "FnSpecialization:   Accumulated bonus {CodeSize = "
                     << CodeSize << "} for argument " << *A << "\n");
   return CodeSize;
@@ -217,7 +220,9 @@ Cost InstCostVisitor::getLatencySavingsForKnownConstants() {
 }
 
 Cost InstCostVisitor::getCodeSizeSavingsForUser(Instruction *User, Value *Use,
-                                                Constant *C) {
+                                                Constant *C,
+                                                CallUserT *CallUsers,
+                                                llvm::Use *UseEdge) {
   // We have already propagated a constant for this user.
   if (KnownConstants.contains(User))
     return 0;
@@ -227,10 +232,35 @@ Cost InstCostVisitor::getCodeSizeSavingsForUser(Instruction *User, Value *Use,
                     : KnownConstants.end();
 
   Cost CodeSize = 0;
+  auto isChainableCall = [&](Instruction *I) -> bool {
+    if (CallInst *CI = dyn_cast<CallInst>(I);
+        CI && CI->getIntrinsicID() == llvm::Intrinsic::not_intrinsic) {
+      LLVM_DEBUG(
+          dbgs() << "FnSpecialization:   Found constant forwarded via a call "
+                 << *C << "\n");
+      Function *F = CI->getCalledFunction();
+      if (F && CallUsers && UseEdge) { // Avoid function pointers
+        unsigned Idx = CI->getArgOperandNo(UseEdge);
+        LLVM_DEBUG(dbgs() << "FnSpecialization:   Function called: "
+                          << F->getName() << " argument number: " << Idx
+                          << "\n");
+        (*CallUsers)[CI].first.Args.push_back({F->getArg(Idx), C});
+        (*CallUsers)[CI].second = F;
+        return true;
+      } else {
+        LLVM_DEBUG(
+            dbgs() << "FnSpecialization:   Could not find call function.\n");
+      }
+    }
+    return false;
+  };
   if (auto *I = dyn_cast<SwitchInst>(User)) {
     CodeSize = estimateSwitchInst(*I);
   } else if (auto *I = dyn_cast<BranchInst>(User)) {
     CodeSize = estimateBranchInst(*I);
+  } else if (isChainableCall(User)) {
+    // Will get benefit from recusive call to findSpecializations()
+    return 0;
   } else {
     C = visit(*User);
     if (!C)
@@ -668,13 +698,17 @@ static unsigned getCostValue(const Cost &C) {
   return static_cast<unsigned>(Value);
 }
 
-bool FunctionSpecializer::runOneSpec(Spec &S, SpecMap &SM,
+bool FunctionSpecializer::runOneSpec(Spec &S, bool Chained, SpecMap &SM,
                                      SmallVectorImpl<Spec> &AllSpecs,
-                                     DenseMap<SpecSig, unsigned> &UniqueSpecs) {
+                                     DenseMap<SpecSig, unsigned> &UniqueSpecs,
+                                     SmallPtrSet<Function *, 4> CurrentChain) {
   Function &F = *(S.F);
   if (!isCandidateFunction(&F))
     return false;
 
+  LLVM_DEBUG(dbgs() << "FnSpecialization: Trying function " << F.getName()
+                    << ", Chain=" << Chained << "\n");
+
   auto [It, Inserted] = FunctionMetrics.try_emplace(&F);
   CodeMetrics &Metrics = It->second;
   // Analyze the function.
@@ -716,7 +750,8 @@ bool FunctionSpecializer::runOneSpec(Spec &S, SpecMap &SM,
   if (Inserted && Metrics.isRecursive)
     promoteConstantStackValues(&F);
 
-  if (!findSpecializations(FuncSize, AllSpecs, SM, S, UniqueSpecs)) {
+  if (!findSpecializations(FuncSize, AllSpecs, SM, S, UniqueSpecs,
+                           CurrentChain)) {
     LLVM_DEBUG(
         dbgs() << "FnSpecialization: No possible specializations found for "
                << F.getName() << "\n");
@@ -740,7 +775,9 @@ bool FunctionSpecializer::run() {
   unsigned NumCandidates = 0;
   for (Function &F : M) {
     Spec S(&F);
-    if (runOneSpec(S, SM, AllSpecs, UniqueSpecs))
+    SmallPtrSet<Function *, 4> CurrentChain;
+    if (runOneSpec(S, /*Chained*/ false, SM, AllSpecs, UniqueSpecs,
+                   CurrentChain))
       ++NumCandidates;
   }
 
@@ -792,50 +829,90 @@ bool FunctionSpecializer::run() {
   // Create the chosen specializations.
   SmallPtrSet<Function *, 8> OriginalFuncs;
   SmallVector<Function *> Clones;
+  // Does this also need to include the base function in the hash, or is the
+  // SpecSig sufficient
+  DenseMap<SpecSig, Function *> UniqueClones;
   for (unsigned I = 0; I < NSpecs; ++I) {
     Spec &S = AllSpecs[BestSpecs[I]];
 
-    auto actuallySpecialize = [&](Spec &S) -> void {
-      // Accumulate the codesize growth for the function, now we are creating
-      // the specialization.
-      FunctionGrowth[S.F] += S.CodeSize;
-
-      S.Clone = createSpecialization(S.F, S.Sig);
+    // Update the known call sites to call the clone.
+    ValueToValueMapTy Mappings;
+
+    auto actuallySpecialize = [&](auto &&actuallySpecialize, Spec &S,
+                                  CallSiteStatusT Status, unsigned Parent,
+                                  ValueToValueMapTy &Mappings) -> void {
+      if (Status == CallSiteStatusT::HAS_PARENT) {
+        for (auto &CS : S.CallSites) {
+          if (CS.Status == Status && CS.Parent == Parent) {
+            CallBase *&Call = CS.CallSite;
+            Value *V = Mappings[Call];
+            Call = dyn_cast<CallBase>(V);
+          }
+        }
+      }
 
-      // Update the known call sites to call the clone.
+      bool NewClone;
+      ValueToValueMapTy CurrMappings;
+      if (auto It = UniqueClones.find(S.Sig); It != UniqueClones.end()) {
+        NewClone = false;
+        S.Clone = It->second;
+      } else {
+        NewClone = true;
+        S.Clone = createSpecialization(S.F, S.Sig, CurrMappings);
+
+        // Accumulate the codesize growth for the function, now we are creating
+        // the specialization.
+        FunctionGrowth[S.F] += S.CodeSize;
+
+        UniqueClones[S.Sig] = S.Clone;
+        Clones.push_back(S.Clone);
+        OriginalFuncs.insert(S.F);
+      }
       for (auto &CS : S.CallSites) {
-        Function *Clone = S.Clone;
-        CallBase *Call = CS.CallSite;
-        LLVM_DEBUG(dbgs() << "FnSpecialization: Redirecting " << *Call
-                          << " to call " << Clone->getName() << "\n");
-        Call->setCalledFunction(Clone);
-        auto &BFI = GetBFI(*Call->getFunction());
-        std::optional<uint64_t> Count =
-            BFI.getBlockProfileCount(Call->getParent());
-        if (Count && !ProfcheckDisableMetadataFixes) {
-          std::optional<llvm::Function::ProfileCount> MaybeCloneCount =
-              Clone->getEntryCount();
-          if (MaybeCloneCount) {
-            uint64_t CallCount = *Count + MaybeCloneCount->getCount();
-            Clone->setEntryCount(CallCount);
-            if (std::optional<llvm::Function::ProfileCount> MaybeOriginalCount =
-                    S.F->getEntryCount()) {
-              uint64_t OriginalCount = MaybeOriginalCount->getCount();
-              if (OriginalCount >= *Count) {
-                S.F->setEntryCount(OriginalCount - *Count);
-              } else {
-                // This should generally not happen as that would mean there are
-                // more computed calls to the function than what was recorded.
-                LLVM_DEBUG(S.F->setEntryCount(0));
+        if (CS.Status == Status && CS.Parent == Parent) {
+          Function *Clone = S.Clone;
+          CallBase *&Call = CS.CallSite;
+          LLVM_DEBUG(dbgs() << "FnSpecialization: Redirecting " << *Call
+                            << " to call " << Clone->getName() << "\n");
+          Call->setCalledFunction(Clone);
+          auto &BFI = GetBFI(*Call->getFunction());
+          std::optional<uint64_t> Count =
+              BFI.getBlockProfileCount(Call->getParent());
+          if (Count && !ProfcheckDisableMetadataFixes) {
+            std::optional<llvm::Function::ProfileCount> MaybeCloneCount =
+                Clone->getEntryCount();
+            if (MaybeCloneCount) {
+              uint64_t CallCount = *Count + MaybeCloneCount->getCount();
+              Clone->setEntryCount(CallCount);
+              if (std::optional<llvm::Function::ProfileCount>
+                      MaybeOriginalCount = S.F->getEntryCount()) {
+                uint64_t OriginalCount = MaybeOriginalCount->getCount();
+                if (OriginalCount >= *Count) {
+                  S.F->setEntryCount(OriginalCount - *Count);
+                } else {
+                  // This should generally not happen as that would mean there
+                  // are more computed calls to the function than what was
+                  // recorded.
+                  LLVM_DEBUG(S.F->setEntryCount(0));
+                }
               }
             }
           }
         }
       }
-      Clones.push_back(S.Clone);
-      OriginalFuncs.insert(S.F);
+      if (!NewClone)
+        return;
+      for (auto &SSI : S.SubSpecs) {
+        Spec &SS = AllSpecs[SSI];
+        actuallySpecialize(actuallySpecialize, SS,
+                           /*Status*/ CallSiteStatusT::HAS_PARENT,
+                           /*Parent*/ S.Loc, CurrMappings);
+      }
     };
-    actuallySpecialize(S);
+
+    actuallySpecialize(actuallySpecialize, S,
+                       /*hasParent*/ CallSiteStatusT::NO_PARENT, /*Parent*/ 0,
+                       Mappings);
   }
 
   Solver.solveWhileResolvedUndefsIn(Clones);
@@ -903,8 +980,8 @@ void FunctionSpecializer::removeDeadFunctions() {
 
 /// Clone the function \p F and remove the ssa_copy intrinsics added by
 /// the SCCPSolver in the cloned version.
-static Function *cloneCandidateFunction(Function *F, unsigned NSpecs) {
-  ValueToValueMapTy Mappings;
+static Function *cloneCandidateFunction(Function *F, unsigned NSpecs,
+                                        ValueToValueMapTy &Mappings) {
   Function *Clone = CloneFunction(F, Mappings);
   Clone->setName(F->getName() + ".specialized." + Twine(NSpecs));
   removeSSACopy(*Clone);
@@ -913,7 +990,8 @@ static Function *cloneCandidateFunction(Function *F, unsigned NSpecs) {
 
 bool FunctionSpecializer::findSpecializations(
     unsigned FuncSize, SmallVectorImpl<Spec> &AllSpecs, SpecMap &SM, Spec &InS,
-    DenseMap<SpecSig, unsigned> &UniqueSpecs) {
+    DenseMap<SpecSig, unsigned> &UniqueSpecs,
+    SmallPtrSet<Function *, 4> &CurrentChain) {
   Function *F = InS.F;
   bool FoundSpecialization = false;
 
@@ -926,15 +1004,32 @@ bool FunctionSpecializer::findSpecializations(
   if (Args.empty())
     return false;
 
-  for (User *U : F->users()) {
-    if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
-      continue;
-    auto &CS = *cast<CallBase>(U);
+  SmallVector<CallBase *, 8> CallSites;
+  CallSiteStatusT Status;
+  if (InS.CallSites.size()) {
+    assert(InS.CallSites.size() == 1 &&
+           "Should only be passing single call spec as part of a chain");
+    CallSites.push_back(InS.CallSites[0].CallSite);
+    Status = CallSiteStatusT::AWAITING_PARENT;
+  } else {
+    Status = CallSiteStatusT::NO_PARENT;
+    for (User *U : F->users()) {
+      // If multiple funcs, check that user is proceeding func
+      if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
+        continue;
+      auto *CS = cast<CallBase>(U);
 
-    // The user instruction does not call our function.
-    if (CS.getCalledFunction() != F)
-      continue;
+      // The user instruction does not call our function.
+      if (CS->getCalledFunction() != F)
+        continue;
+
+      CallSites.push_back(CS);
+    }
+  }
 
+  for (auto *CSP : CallSites) {
+    auto &CS = *CSP;
+    Spec Chain(F, /*CallSite*/ CSP, Status);
     // If the call site has attribute minsize set, that callsite won't be
     // specialized.
     if (CS.hasFnAttr(Attribute::MinSize))
@@ -949,18 +1044,41 @@ bool FunctionSpecializer::findSpecializations(
     // constant operands of this call site.
     SpecSig S;
     for (Argument *A : Args) {
-      Constant *C = getCandidateConstant(CS.getArgOperand(A->getArgNo()));
-      if (!C)
-        continue;
-      LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting argument "
-                        << A->getName() << " : " << C->getNameOrAsOperand()
-                        << "\n");
-      S.Args.push_back({A, C});
+      // Check if this argument is constant from the call chain propogation
+      unsigned Idx;
+      auto &As = InS.Sig.Args;
+      for (Idx = 0; Idx < As.size(); ++Idx) {
+        if (As[Idx].Formal == A)
+          break;
+      }
+      if (As.size() == Idx) {
+        Value *PossC = CS.getArgOperand(A->getArgNo());
+        Constant *C = getCandidateConstant(PossC);
+        if (!C)
+          continue;
+        LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting argument "
+                          << A->getName() << " : " << C->getNameOrAsOperand()
+                          << "\n");
+        S.Args.push_back({A, C});
+        if (InS.CallSites.size()) {
+          assert(InS.CallSites.size() == 1 &&
+                 "Should only be passing single call spec as part of a chain");
+          InS.Sig.Args.push_back({A, C});
+        }
+      } else {
+        Constant *C = InS.Sig.Args[Idx].Actual;
+        S.Args.push_back({A, C});
+        LLVM_DEBUG(dbgs() << "FnSpecialization: Found passed argument "
+                          << A->getName() << " : " << C->getNameOrAsOperand()
+                          << "\n");
+      }
     }
 
     if (S.Args.empty())
       continue;
 
+    CallUserT CallUsers;
+
     // Check if we have encountered the same specialisation already.
     if (auto It = UniqueSpecs.find(S); It != UniqueSpecs.end()) {
       // Existing specialisation. Add the call to the list to rewrite, unless
@@ -972,17 +1090,53 @@ bool FunctionSpecializer::findSpecializations(
       if (CS.getFunction() == F)
         continue;
       const unsigned Index = It->second;
-      AllSpecs[Index].addCall({&CS});
+      AllSpecs[Index].addCall({&CS, Status, /*Parent*/ 0});
     } else {
       // Calculate the specialisation gain.
       Cost CodeSize;
       unsigned Score = 0;
       InstCostVisitor Visitor = getInstCostVisitorFor(F);
       for (ArgInfo &A : S.Args) {
-        CodeSize += Visitor.getCodeSizeSavingsForArg(A.Formal, A.Actual);
+        CodeSize +=
+            Visitor.getCodeSizeSavingsForArg(A.Formal, A.Actual, &CallUsers);
         Score += getInliningBonus(A.Formal, A.Actual);
       }
+
       CodeSize += Visitor.getCodeSizeSavingsFromPendingPHIs();
+      CurrentChain.insert(F);
+
+      for (auto &CU : CallUsers) {
+        Function *NewF = CU.second.second;
+
+        // Recurse only if constants found for the function
+        if (!NewF)
+          continue;
+
+        // Don't allow any recursion in chains
+        bool isRecursion = CurrentChain.contains(NewF);
+        if (isRecursion)
+          continue;
+
+        LLVM_DEBUG(
+            dbgs() << "FnSpecialization:   Recursively calling runOneSpec() on "
+                   << NewF->getName() << "\n");
+
+        // Since the function might not yet be known when processing the
+        // constants due to a function pointer, wait to extract the argument
+        // pointer at a given index.
+        SpecSig NewS = CU.second.first;
+
+        Spec CallSpec(NewF, /*CallSite*/ CU.first, NewS,
+                      /*Status*/ CallSiteStatusT::AWAITING_PARENT);
+        runOneSpec(CallSpec, /*Chained*/ true, SM, AllSpecs, UniqueSpecs,
+                   CurrentChain);
+
+        // Use CallSpec.Sig since may have been added to within findSpec()
+        if (auto It = UniqueSpecs.find(CallSpec.Sig); It != UniqueSpecs.end()) {
+          const unsigned Index = It->second;
+          Chain.SubSpecs.push_back(Index);
+        }
+      }
 
       unsigned CodeSizeSavings = getCostValue(CodeSize);
       unsigned SpecSize = FuncSize - CodeSizeSavings;
@@ -1025,19 +1179,52 @@ bool FunctionSpecializer::findSpecializations(
         if ((FunctionGrowth[F] + SpecSize) / FuncSize > MaxCodeSizeGrowth)
           return false;
 
-        Score += std::max(CodeSizeSavings, LatencySavings);
+        Chain.Score += std::max(CodeSizeSavings, LatencySavings);
         return true;
       };
 
-      // Discard unprofitable specialisations.
-      if (!IsProfitable())
+      auto RemoveFromSubSpecs = [&](Spec &S) -> void {
+        for (unsigned &SSI : S.SubSpecs) {
+          Spec &SS = AllSpecs[SSI];
+          auto NewEnd = std::remove_if(
+              SS.CallSites.begin(), SS.CallSites.end(),
+              [&](SpecCall &SC) -> bool {
+                return SC.Status == CallSiteStatusT::AWAITING_PARENT;
+              });
+          SS.CallSites.erase(NewEnd, SS.CallSites.end());
+        }
+      };
+
+      // Discard unprofitable specialisations
+      if (!IsProfitable()) {
+        RemoveFromSubSpecs(Chain); // Remove Parent from SubSpecs
         continue;
+      }
+
+      auto AddParentToSubSpecs = [&](Spec &S) -> void {
+        for (unsigned &SSI : S.SubSpecs) {
+          Spec &SS = AllSpecs[SSI];
+          for (SpecCall &SC : SS.CallSites) {
+            if (SC.Status == CallSiteStatusT::AWAITING_PARENT) {
+              SC.Status = CallSiteStatusT::HAS_PARENT;
+              SC.Parent = S.Loc;
+            }
+          }
+        }
+      };
 
       // Create a new specialisation entry.
-      auto &Spec = AllSpecs.emplace_back(F, S, Score, SpecSize);
-      if (CS.getFunction() != F)
-        Spec.addCall({&CS});
+      auto &Spec = AllSpecs.emplace_back(Chain);
       const unsigned Index = AllSpecs.size() - 1;
+      Spec.Loc = Index;
+      AddParentToSubSpecs(Spec);
+      // Update the chain's Sig for any new constants at this level
+      Spec.Sig = S;
+      Spec.CodeSize = SpecSize;
+
+      if (CS.getFunction() == F && !Spec.CallSites[0].Parent) {
+        Spec.CallSites.clear();
+      }
       UniqueSpecs[S] = Index;
 
       FoundSpecialization = true;
@@ -1078,9 +1265,11 @@ bool FunctionSpecializer::isCandidateFunction(Function *F) {
   return true;
 }
 
-Function *FunctionSpecializer::createSpecialization(Function *F,
-                                                    const SpecSig &S) {
-  Function *Clone = cloneCandidateFunction(F, Specializations.size() + 1);
+Function *
+FunctionSpecializer::createSpecialization(Function *F, const SpecSig &S,
+                                          ValueToValueMapTy &Mappings) {
+  Function *Clone =
+      cloneCandidateFunction(F, Specializations.size() + 1, Mappings);
 
   // The original function does not neccessarily have internal linkage, but the
   // clone must.

>From 6e0c258b2605206f086afe355850c5151f862fee Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Mon, 29 Sep 2025 18:44:09 -0700
Subject: [PATCH 11/23] [FnSpecialization] (2/6) Avoid creating standalone
 specializations when only ever part of a chain

Will get specialized as part of the chain if the chain scores well enough.
---
 llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h | 7 ++++++-
 llvm/lib/Transforms/IPO/FunctionSpecialization.cpp        | 5 +++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index 96dbd5d9a31e5..c18efd5bf6132 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -157,7 +157,12 @@ struct Spec {
   // List of call sites, matching this specialization.
   SmallVector<SpecCall> CallSites;
 
-  void addCall(SpecCall SC) { CallSites.push_back(SC); }
+  bool AllChains = true;
+
+  void addCall(SpecCall SC) {
+    CallSites.push_back(SC);
+    AllChains = AllChains && SC.Status != CallSiteStatusT::NO_PARENT;
+  }
 
   // List Sub-Specializations
   SmallVector<unsigned> SubSpecs;
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 02836a7fc65d8..7ce86cf0f221c 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -792,6 +792,10 @@ bool FunctionSpecializer::run() {
   // specialization budget, which is derived from maximum number of
   // specializations per specialization candidate function.
   auto CompareScore = [&AllSpecs](unsigned I, unsigned J) {
+    if (AllSpecs[J].AllChains)
+      return true;
+    if (AllSpecs[I].AllChains)
+      return false;
     if (AllSpecs[I].Score != AllSpecs[J].Score)
       return AllSpecs[I].Score > AllSpecs[J].Score;
     return I > J;
@@ -1224,6 +1228,7 @@ bool FunctionSpecializer::findSpecializations(
 
       if (CS.getFunction() == F && !Spec.CallSites[0].Parent) {
         Spec.CallSites.clear();
+        Spec.AllChains = true;
       }
       UniqueSpecs[S] = Index;
 

>From 5070b852256bc9c52e626107ec5463af6104bf7a Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Mon, 29 Sep 2025 18:38:41 -0700
Subject: [PATCH 12/23] [FnSpecialization] (3/6) Don't consider specializations
 that are only chains in NSpecs

Will get specialized as part of chain, so aren't viable as a standalone.
---
 llvm/lib/Transforms/IPO/FunctionSpecialization.cpp | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 7ce86cf0f221c..800771adb55bf 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -781,7 +781,11 @@ bool FunctionSpecializer::run() {
       ++NumCandidates;
   }
 
-  if (!NumCandidates) {
+  unsigned IndepSpecs = 0;
+  for (auto &S : AllSpecs)
+    if (!S.AllChains)
+      ++IndepSpecs;
+  if (!NumCandidates || !IndepSpecs) {
     LLVM_DEBUG(
         dbgs()
         << "FnSpecialization: No possible specializations found in module\n");
@@ -800,8 +804,8 @@ bool FunctionSpecializer::run() {
       return AllSpecs[I].Score > AllSpecs[J].Score;
     return I > J;
   };
-  const unsigned NSpecs =
-      std::min(NumCandidates * MaxClones, unsigned(AllSpecs.size()));
+  const unsigned NSpecs = std::min(
+      {NumCandidates * MaxClones, unsigned(AllSpecs.size()), IndepSpecs});
   SmallVector<unsigned> BestSpecs(NSpecs + 1);
   std::iota(BestSpecs.begin(), BestSpecs.begin() + NSpecs, 0);
   if (AllSpecs.size() > NSpecs) {
@@ -1228,7 +1232,7 @@ bool FunctionSpecializer::findSpecializations(
 
       if (CS.getFunction() == F && !Spec.CallSites[0].Parent) {
         Spec.CallSites.clear();
-        Spec.AllChains = true;
+        // Don't reset AllChains since this can be standalone specialized
       }
       UniqueSpecs[S] = Index;
 

>From 2cff3147b575f5c811150a0c39728cd42170de5b Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Mon, 29 Sep 2025 17:02:18 -0700
Subject: [PATCH 13/23] [FnSpecialization] (4/6) Cache scoring metrics as part
 of Spec

When calculating possible Chains, use the metrics saved as part
of the sub-specializations.
---
 .../Transforms/IPO/FunctionSpecialization.h   | 21 ++++++--
 .../Transforms/IPO/FunctionSpecialization.cpp | 50 ++++++++++++++++---
 2 files changed, 59 insertions(+), 12 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index c18efd5bf6132..bcc9f84308c33 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -154,6 +154,15 @@ struct Spec {
   // Number of instructions in the specialization.
   unsigned CodeSize;
 
+  // Cumulative function size of the chain
+  unsigned FuncSize;
+
+  // Latency savings
+  unsigned Latency;
+
+  // Benefit from inlining
+  unsigned InlineScore;
+
   // List of call sites, matching this specialization.
   SmallVector<SpecCall> CallSites;
 
@@ -170,16 +179,20 @@ struct Spec {
   // Index within AllSpecs
   unsigned Loc = 0;
 
-  Spec(Function *F, CallBase *CallSite, const SpecSig &S, CallSiteStatusT Status)
-      : F(F), Clone(nullptr), Sig(S), Score(), CodeSize(), CallSites() {
+  Spec(Function *F, CallBase *CallSite, const SpecSig &S,
+       CallSiteStatusT Status)
+      : F(F), Clone(nullptr), Sig(S), Score(), CodeSize(), FuncSize(),
+        InlineScore(), CallSites() {
     addCall({CallSite, Status, /*Parent*/ 0});
   }
   Spec(Function *F, CallBase *CallSite, CallSiteStatusT Status)
-      : F(F), Clone(nullptr), Sig(), Score(), CodeSize(), CallSites() {
+      : F(F), Clone(nullptr), Sig(), Score(), CodeSize(), FuncSize(),
+        InlineScore(), CallSites() {
     addCall({CallSite, Status, /*Parent*/ 0});
   }
   Spec(Function *F)
-      : F(F), Clone(nullptr), Sig(), Score(0), CodeSize(), CallSites(0) {}
+      : F(F), Clone(nullptr), Sig(), Score(), CodeSize(), FuncSize(),
+        InlineScore(), CallSites() {}
 };
 
 class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 800771adb55bf..7a78bd5d5aa1c 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -1149,6 +1149,27 @@ bool FunctionSpecializer::findSpecializations(
       unsigned CodeSizeSavings = getCostValue(CodeSize);
       unsigned SpecSize = FuncSize - CodeSizeSavings;
 
+      // Cache savings information in the chain to use for profitibility
+      // analysis of the entire chain
+      Chain.CodeSize = SpecSize;
+      Chain.InlineScore = Score;
+      Chain.FuncSize = FuncSize;
+      unsigned CumulCodeSize = 0;
+      unsigned CumulFuncSize = 0;
+      unsigned CumulInlineScore = 0;
+      unsigned CumulLatency = 0;
+      auto getCumulScores = [&](auto &&getCumulScores, Spec &CurrSpec) -> void {
+        CumulCodeSize += CurrSpec.CodeSize;
+        CumulFuncSize += CurrSpec.FuncSize;
+        CumulInlineScore += CurrSpec.InlineScore;
+        CumulLatency += CurrSpec.Latency;
+        for (auto SSI : CurrSpec.SubSpecs) {
+          getCumulScores(getCumulScores, AllSpecs[SSI]);
+        }
+      };
+      getCumulScores(getCumulScores, Chain);
+      unsigned CumulCodeSizeSavings = CumulFuncSize - CumulCodeSize;
+
       auto IsProfitable = [&]() -> bool {
         // No check required.
         if (ForceSpecialization)
@@ -1157,37 +1178,50 @@ bool FunctionSpecializer::findSpecializations(
         LLVM_DEBUG(
             dbgs() << "FnSpecialization: Specialization bonus {Inlining = "
                    << Score << " (" << (Score * 100 / FuncSize) << "%)}\n");
+        LLVM_DEBUG(
+            dbgs()
+            << "FnSpecialization: Chain specialization bonus {Inlining = "
+            << CumulInlineScore << " ("
+            << (CumulInlineScore * 100 / CumulFuncSize) << "%)}\n");
 
         // Minimum inlining bonus.
-        if (Score > MinInliningBonus * FuncSize / 100)
+        if ((Score > MinInliningBonus * FuncSize / 100) &&
+            (CumulInlineScore > MinInliningBonus * CumulFuncSize / 100))
           return true;
 
         LLVM_DEBUG(
             dbgs() << "FnSpecialization: Specialization bonus {CodeSize = "
                    << CodeSizeSavings << " ("
                    << (CodeSizeSavings * 100 / FuncSize) << "%)}\n");
+        LLVM_DEBUG(dbgs() << "FnSpecialization: Cumulative specialization "
+                             "bonus {CodeSize = "
+                          << CumulCodeSizeSavings << " ("
+                          << (CumulCodeSizeSavings * 100 / CumulFuncSize)
+                          << "%)}\n");
 
         // Minimum codesize savings.
-        if (CodeSizeSavings <= MinCodeSizeSavings * FuncSize / 100)
+        if ((CodeSizeSavings <= MinCodeSizeSavings * FuncSize / 100) &&
+            (CumulCodeSizeSavings <= MinCodeSizeSavings * CumulFuncSize / 100))
           return false;
 
         // Lazily compute the Latency, to avoid unnecessarily computing BFI.
-        unsigned LatencySavings =
+        Chain.Latency =
             getCostValue(Visitor.getLatencySavingsForKnownConstants());
+        CumulLatency += Chain.Latency;
 
         LLVM_DEBUG(
             dbgs() << "FnSpecialization: Specialization bonus {Latency = "
-                   << LatencySavings << " ("
-                   << (LatencySavings * 100 / FuncSize) << "%)}\n");
+                   << CumulLatency << " ("
+                   << (CumulLatency * 100 / CumulFuncSize) << "%)}\n");
 
         // Minimum latency savings.
-        if (LatencySavings < MinLatencySavings * FuncSize / 100)
+        if (CumulLatency < MinLatencySavings * CumulFuncSize / 100)
           return false;
         // Maximum codesize growth.
         if ((FunctionGrowth[F] + SpecSize) / FuncSize > MaxCodeSizeGrowth)
           return false;
 
-        Chain.Score += std::max(CodeSizeSavings, LatencySavings);
+        Score = CumulInlineScore + std::max(CumulCodeSizeSavings, CumulLatency);
         return true;
       };
 
@@ -1228,7 +1262,7 @@ bool FunctionSpecializer::findSpecializations(
       AddParentToSubSpecs(Spec);
       // Update the chain's Sig for any new constants at this level
       Spec.Sig = S;
-      Spec.CodeSize = SpecSize;
+      Spec.Score = Score;
 
       if (CS.getFunction() == F && !Spec.CallSites[0].Parent) {
         Spec.CallSites.clear();

>From 49725f4e0f211989f49e1e9a82ac54e0d692bbfd Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 30 Sep 2025 16:39:23 -0700
Subject: [PATCH 14/23] [FnSpecialization] (5/6) Use an explicit structure for
 tracking visited functions

Otherwise confusing with Chaining.
---
 llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h | 1 +
 llvm/lib/Transforms/IPO/FunctionSpecialization.cpp        | 7 ++++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index bcc9f84308c33..0e42d012fefbb 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -295,6 +295,7 @@ class FunctionSpecializer {
 
   SmallPtrSet<Function *, 32> Specializations;
   SmallPtrSet<Function *, 32> DeadFunctions;
+  SmallPtrSet<Function *, 32> VisitedFunctions;
   DenseMap<Function *, CodeMetrics> FunctionMetrics;
   DenseMap<Function *, unsigned> FunctionGrowth;
   unsigned NGlobals = 0;
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 7a78bd5d5aa1c..bf6d675957aa9 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -736,9 +736,14 @@ bool FunctionSpecializer::runOneSpec(Spec &S, bool Chained, SpecMap &SM,
   // recursive functions when running multiple times to save wasted analysis,
   // as we will not be able to specialize on any newly found literal constant
   // return values.
-  if (!SpecializeLiteralConstant && !Inserted && !Metrics.isRecursive)
+  if (!Chained && !SpecializeLiteralConstant && VisitedFunctions.contains(&F) &&
+      !Metrics.isRecursive)
     return false;
 
+  // Don't want to mistake this chain for checking all of the CallSites for F
+  if (!Chained)
+    VisitedFunctions.insert(&F);
+
   int64_t Sz = Metrics.NumInsts.getValue();
   assert(Sz > 0 && "CodeSize should be positive");
   // It is safe to down cast from int64_t, NumInsts is always positive.

>From 1b5a006ecbbbc6c6e68fd588b5d19472db97a9d5 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 30 Sep 2025 12:09:52 -0700
Subject: [PATCH 15/23] [FnSpecialization] (6/6) Update tests for prior set of
 changes

---
 .../specialize-chain.ll                       | 44 +++++++--
 .../FunctionSpecialization/track-return.ll    | 94 ++++++++++---------
 2 files changed, 86 insertions(+), 52 deletions(-)

diff --git a/llvm/test/Transforms/FunctionSpecialization/specialize-chain.ll b/llvm/test/Transforms/FunctionSpecialization/specialize-chain.ll
index 8b3a028ca1aa7..ac786d0e27cee 100644
--- a/llvm/test/Transforms/FunctionSpecialization/specialize-chain.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/specialize-chain.ll
@@ -114,19 +114,51 @@ entry:
 ; CHECK-NEXT:    ret i32 0
 ;
 ;
-; CHECK-LABEL: define range(i32 -2147483642, -2147483648) i32 @main() {
+; CHECK-LABEL: define i32 @main() {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[ADD:%.*]] = call i32 @incr.specialized.1(i32 10)
 ; CHECK-NEXT:    [[INT:%.*]] = call i32 @intrinsic(i32 3)
 ; CHECK-NEXT:    [[FWD_UNFOLD:%.*]] = call i32 @forward_unfold(i32 3)
-; CHECK-NEXT:    [[FWD_INNER:%.*]] = call i32 @forward_inner(i32 3)
-; CHECK-NEXT:    [[FWD_OUTER:%.*]] = call i32 @forward_outer(i32 3)
-; CHECK-NEXT:    [[FWD_OUTER1:%.*]] = call i32 @forward_outer(i32 3)
-; CHECK-NEXT:    [[MULTI_CALL:%.*]] = call i32 @multi_call(i32 5)
-; CHECK-NEXT:    ret i32 [[MULTI_CALL]]
+; CHECK-NEXT:    [[FWD_INNER:%.*]] = call i32 @forward_inner.specialized.4(i32 3)
+; CHECK-NEXT:    [[FWD_OUTER:%.*]] = call i32 @forward_outer.specialized.6(i32 3)
+; CHECK-NEXT:    [[FWD_OUTER1:%.*]] = call i32 @forward_outer.specialized.6(i32 3)
+; CHECK-NEXT:    [[MULTI_CALL:%.*]] = call i32 @multi_call.specialized.2(i32 5)
+; CHECK-NEXT:    ret i32 11
 ;
 ;
 ; CHECK-LABEL: define internal i32 @incr.specialized.1(
 ; CHECK-SAME: i32 [[A:%.*]]) {
 ; CHECK-NEXT:    ret i32 poison
 ;
+;
+; CHECK-LABEL: define internal i32 @multi_call.specialized.2(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @incr.specialized.3(i32 5)
+; CHECK-NEXT:    [[MUL_CALL:%.*]] = call i32 @incr.specialized.1(i32 10)
+; CHECK-NEXT:    ret i32 poison
+;
+;
+; CHECK-LABEL: define internal i32 @incr.specialized.3(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT:    ret i32 poison
+;
+;
+; CHECK-LABEL: define internal i32 @forward_inner.specialized.4(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @incr.specialized.5(i32 3)
+; CHECK-NEXT:    ret i32 poison
+;
+;
+; CHECK-LABEL: define internal i32 @incr.specialized.5(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT:    ret i32 poison
+;
+;
+; CHECK-LABEL: define internal i32 @forward_outer.specialized.6(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @forward_inner.specialized.4(i32 3)
+; CHECK-NEXT:    ret i32 poison
+;
diff --git a/llvm/test/Transforms/FunctionSpecialization/track-return.ll b/llvm/test/Transforms/FunctionSpecialization/track-return.ll
index aaff6c138bbaa..d03d7c872ed79 100644
--- a/llvm/test/Transforms/FunctionSpecialization/track-return.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/track-return.ll
@@ -4,7 +4,7 @@ define i64 @main() {
 ; CHECK:       define i64 @main
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[C1:%.*]] = call i64 @foo.specialized.1(i1 true, i64 3, i64 1)
-; CHECK-NEXT:    [[C2:%.*]] = call i64 @foo.specialized.2(i1 false, i64 4, i64 -1)
+; CHECK-NEXT:    [[C2:%.*]] = call i64 @foo.specialized.3(i1 false, i64 4, i64 -1)
 ; CHECK-NEXT:    ret i64 8
 ;
 entry:
@@ -15,27 +15,6 @@ entry:
 }
 
 define internal i64 @foo(i1 %flag, i64 %m, i64 %n) {
-;
-; CHECK:       define internal i64 @foo.specialized.1
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label %plus
-; CHECK:       plus:
-; CHECK-NEXT:    [[N0:%.*]] = call i64 @binop.specialized.4(i64 3, i64 1)
-; CHECK-NEXT:    [[RES0:%.*]] = call i64 @bar.specialized.6(i64 4)
-; CHECK-NEXT:    br label %merge
-; CHECK:       merge:
-; CHECK-NEXT:    ret i64 poison
-;
-; CHECK:       define internal i64 @foo.specialized.2
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label %minus
-; CHECK:       minus:
-; CHECK-NEXT:    [[N1:%.*]] = call i64 @binop.specialized.3(i64 4, i64 -1)
-; CHECK-NEXT:    [[RES1:%.*]] = call i64 @bar.specialized.5(i64 3)
-; CHECK-NEXT:    br label %merge
-; CHECK:       merge:
-; CHECK-NEXT:    ret i64 poison
-;
 entry:
   br i1 %flag, label %plus, label %minus
 
@@ -55,21 +34,61 @@ merge:
 }
 
 define internal i64 @binop(i64 %x, i64 %y) {
+entry:
+  %z = add i64 %x, %y
+  ret i64 %z
+}
+
+define internal i64 @bar(i64 %n) {
+entry:
+  %cmp = icmp sgt i64 %n, 3
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %res0 = sdiv i64 %n, 2
+  br label %if.end
+
+if.else:
+  %res1 = mul i64 %n, 2
+  br label %if.end
+
+if.end:
+  %res = phi i64 [ %res0, %if.then ], [ %res1, %if.else]
+  ret i64 %res
+}
+
+;
+; CHECK:       define internal i64 @foo.specialized.1
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label %plus
+; CHECK:       plus:
+; CHECK-NEXT:    [[N0:%.*]] = call i64 @binop.specialized.2(i64 3, i64 1)
+; CHECK-NEXT:    [[RES0:%.*]] = call i64 @bar.specialized.6(i64 4)
+; CHECK-NEXT:    br label %merge
+; CHECK:       merge:
+; CHECK-NEXT:    ret i64 poison
+;
 ;
-; CHECK:       define internal i64 @binop.specialized.3
+; CHECK:       define internal i64 @binop.specialized.2
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret i64 poison
 ;
+;
+; CHECK:       define internal i64 @foo.specialized.3
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label %minus
+; CHECK:       minus:
+; CHECK-NEXT:    [[N1:%.*]] = call i64 @binop.specialized.4(i64 4, i64 -1)
+; CHECK-NEXT:    [[RES1:%.*]] = call i64 @bar.specialized.5(i64 3)
+; CHECK-NEXT:    br label %merge
+; CHECK:       merge:
+; CHECK-NEXT:    ret i64 poison
+;
+;
 ; CHECK:       define internal i64 @binop.specialized.4
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret i64 poison
 ;
-entry:
-  %z = add i64 %x, %y
-  ret i64 %z
-}
-
-define internal i64 @bar(i64 %n) {
 ;
 ; CHECK:       define internal i64 @bar.specialized.5
 ; CHECK-NEXT:  entry:
@@ -87,20 +106,3 @@ define internal i64 @bar(i64 %n) {
 ; CHECK:       if.end:
 ; CHECK-NEXT:    ret i64 poison
 ;
-entry:
-  %cmp = icmp sgt i64 %n, 3
-  br i1 %cmp, label %if.then, label %if.else
-
-if.then:
-  %res0 = sdiv i64 %n, 2
-  br label %if.end
-
-if.else:
-  %res1 = mul i64 %n, 2
-  br label %if.end
-
-if.end:
-  %res = phi i64 [ %res0, %if.then ], [ %res1, %if.else]
-  ret i64 %res
-}
-

>From 841a08e9d1e63a937671db4cecf787b91988462f Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Mon, 29 Sep 2025 17:18:48 -0700
Subject: [PATCH 16/23] [FnSpecialization] Allow chains to form via recrusive
 folding

---
 .../Transforms/IPO/FunctionSpecialization.cpp |  7 +++---
 .../specialize-chain.ll                       | 22 ++++++++++++++-----
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index bf6d675957aa9..60e1134e5af14 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -276,11 +276,12 @@ Cost InstCostVisitor::getCodeSizeSavingsForUser(Instruction *User, Value *Use,
 
   LLVM_DEBUG(dbgs() << "FnSpecialization:     {CodeSize = " << CodeSize
                     << "} for user " << *User << "\n");
-
-  for (auto *U : User->users())
+  for (llvm::Use &UE : User->uses()) {
+    llvm::User *U = UE.getUser();
     if (auto *UI = dyn_cast<Instruction>(U))
       if (UI != User && isBlockExecutable(UI->getParent()))
-        CodeSize += getCodeSizeSavingsForUser(UI, User, C);
+        CodeSize += getCodeSizeSavingsForUser(UI, User, C, CallUsers, &UE);
+  }
 
   return CodeSize;
 }
diff --git a/llvm/test/Transforms/FunctionSpecialization/specialize-chain.ll b/llvm/test/Transforms/FunctionSpecialization/specialize-chain.ll
index ac786d0e27cee..e09f33e7fadaa 100644
--- a/llvm/test/Transforms/FunctionSpecialization/specialize-chain.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/specialize-chain.ll
@@ -118,11 +118,11 @@ entry:
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[ADD:%.*]] = call i32 @incr.specialized.1(i32 10)
 ; CHECK-NEXT:    [[INT:%.*]] = call i32 @intrinsic(i32 3)
-; CHECK-NEXT:    [[FWD_UNFOLD:%.*]] = call i32 @forward_unfold(i32 3)
+; CHECK-NEXT:    [[FWD_UNFOLD:%.*]] = call i32 @forward_unfold.specialized.2(i32 3)
 ; CHECK-NEXT:    [[FWD_INNER:%.*]] = call i32 @forward_inner.specialized.4(i32 3)
 ; CHECK-NEXT:    [[FWD_OUTER:%.*]] = call i32 @forward_outer.specialized.6(i32 3)
 ; CHECK-NEXT:    [[FWD_OUTER1:%.*]] = call i32 @forward_outer.specialized.6(i32 3)
-; CHECK-NEXT:    [[MULTI_CALL:%.*]] = call i32 @multi_call.specialized.2(i32 5)
+; CHECK-NEXT:    [[MULTI_CALL:%.*]] = call i32 @multi_call.specialized.7(i32 5)
 ; CHECK-NEXT:    ret i32 11
 ;
 ;
@@ -131,11 +131,10 @@ entry:
 ; CHECK-NEXT:    ret i32 poison
 ;
 ;
-; CHECK-LABEL: define internal i32 @multi_call.specialized.2(
+; CHECK-LABEL: define internal i32 @forward_unfold.specialized.2(
 ; CHECK-SAME: i32 [[A:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[CALL:%.*]] = call i32 @incr.specialized.3(i32 5)
-; CHECK-NEXT:    [[MUL_CALL:%.*]] = call i32 @incr.specialized.1(i32 10)
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @incr.specialized.3(i32 30)
 ; CHECK-NEXT:    ret i32 poison
 ;
 ;
@@ -162,3 +161,16 @@ entry:
 ; CHECK-NEXT:    [[CALL:%.*]] = call i32 @forward_inner.specialized.4(i32 3)
 ; CHECK-NEXT:    ret i32 poison
 ;
+;
+; CHECK-LABEL: define internal i32 @multi_call.specialized.7(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @incr.specialized.8(i32 5)
+; CHECK-NEXT:    [[MUL_CALL:%.*]] = call i32 @incr.specialized.1(i32 10)
+; CHECK-NEXT:    ret i32 poison
+;
+;
+; CHECK-LABEL: define internal i32 @incr.specialized.8(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT:    ret i32 poison
+;

>From fc70c018aab69d737b690d4adb37ab3284f2172d Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Mon, 29 Sep 2025 16:26:06 -0700
Subject: [PATCH 17/23] [FnSpecialization] Allow chains to form when collapsing
 PHI nodes

---
 llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h | 3 ++-
 llvm/lib/Transforms/IPO/FunctionSpecialization.cpp        | 7 ++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index 0e42d012fefbb..1910f3ecdba4f 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -228,7 +228,8 @@ class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
   LLVM_ABI Cost getCodeSizeSavingsForArg(Argument *A, Constant *C,
                                          CallUserT *CallUsers = nullptr);
 
-  LLVM_ABI Cost getCodeSizeSavingsFromPendingPHIs();
+  LLVM_ABI Cost
+  getCodeSizeSavingsFromPendingPHIs(CallUserT *CallUsers = nullptr);
 
   LLVM_ABI Cost getLatencySavingsForKnownConstants();
 
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 60e1134e5af14..8c3586fab4ea5 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -154,13 +154,14 @@ Constant *InstCostVisitor::findConstantFor(Value *V) const {
   return KnownConstants.lookup(V);
 }
 
-Cost InstCostVisitor::getCodeSizeSavingsFromPendingPHIs() {
+Cost InstCostVisitor::getCodeSizeSavingsFromPendingPHIs(CallUserT *CallUsers) {
   Cost CodeSize;
   while (!PendingPHIs.empty()) {
     Instruction *Phi = PendingPHIs.pop_back_val();
     // The pending PHIs could have been proven dead by now.
     if (isBlockExecutable(Phi->getParent()))
-      CodeSize += getCodeSizeSavingsForUser(Phi);
+      CodeSize +=
+          getCodeSizeSavingsForUser(Phi, nullptr, nullptr, CallUsers, nullptr);
   }
   return CodeSize;
 }
@@ -1116,7 +1117,7 @@ bool FunctionSpecializer::findSpecializations(
         Score += getInliningBonus(A.Formal, A.Actual);
       }
 
-      CodeSize += Visitor.getCodeSizeSavingsFromPendingPHIs();
+      CodeSize += Visitor.getCodeSizeSavingsFromPendingPHIs(&CallUsers);
       CurrentChain.insert(F);
 
       for (auto &CU : CallUsers) {

>From fb21dc60867473e85f76dc9f45c8c445f279922f Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 30 Sep 2025 22:09:58 -0700
Subject: [PATCH 18/23] [FnSpecialization] Refactor CallUsersT to contain
 Idx/Constant pairs

In the future we won't know the Function at the time of insertion, so
need to store and index so we can look up the Argument later.
---
 llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h | 6 +++---
 llvm/lib/Transforms/IPO/FunctionSpecialization.cpp        | 6 ++++--
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index 1910f3ecdba4f..a70439146a144 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -95,12 +95,12 @@
 namespace llvm {
 struct Spec;
 
-struct SpecSig;
-
 // Map of potential specializations for each function.
 using SpecMap = DenseMap<Function *, SmallVector<unsigned>>;
 
-using CallUserT = SmallMapVector<CallBase *, std::pair<SpecSig, Function *>, 4>;
+using CallUserT = SmallMapVector<
+    CallBase *,
+    std::pair<SmallVector<std::pair<unsigned, Constant *>, 4>, Function *>, 4>;
 
 // Just a shorter abbreviation to improve indentation.
 using Cost = InstructionCost;
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 8c3586fab4ea5..f0d323929d474 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -245,7 +245,7 @@ Cost InstCostVisitor::getCodeSizeSavingsForUser(Instruction *User, Value *Use,
         LLVM_DEBUG(dbgs() << "FnSpecialization:   Function called: "
                           << F->getName() << " argument number: " << Idx
                           << "\n");
-        (*CallUsers)[CI].first.Args.push_back({F->getArg(Idx), C});
+        (*CallUsers)[CI].first.push_back({Idx, C});
         (*CallUsers)[CI].second = F;
         return true;
       } else {
@@ -1139,7 +1139,9 @@ bool FunctionSpecializer::findSpecializations(
         // Since the function might not yet be known when processing the
         // constants due to a function pointer, wait to extract the argument
         // pointer at a given index.
-        SpecSig NewS = CU.second.first;
+        SpecSig NewS;
+        for (auto &P : CU.second.first)
+          NewS.Args.push_back({NewF->getArg(P.first), P.second});
 
         Spec CallSpec(NewF, /*CallSite*/ CU.first, NewS,
                       /*Status*/ CallSiteStatusT::AWAITING_PARENT);

>From da77758123d701196a386beafdfb52a21a3df537 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Wed, 1 Oct 2025 07:34:48 -0700
Subject: [PATCH 19/23] [FnSpecialization] If the Argument number is greater
 than the number of arguments, skip chaining

See test/Transforms/FunctionSpecialization/compiler-crash-60191.ll
---
 llvm/lib/Transforms/IPO/FunctionSpecialization.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index f0d323929d474..dd126a91da7e6 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -1067,7 +1067,10 @@ bool FunctionSpecializer::findSpecializations(
           break;
       }
       if (As.size() == Idx) {
-        Value *PossC = CS.getArgOperand(A->getArgNo());
+        unsigned ArgNo = A->getArgNo();
+        if (ArgNo >= CS.arg_size())
+          continue;
+        Value *PossC = CS.getArgOperand(ArgNo);
         Constant *C = getCandidateConstant(PossC);
         if (!C)
           continue;

>From 9773c8b61e11b57032be33800319d6fefe8bf1f0 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Wed, 1 Oct 2025 07:37:57 -0700
Subject: [PATCH 20/23] [FnSpecialization] Allow specialization of indirect
 function calls exposed by specialization

---
 .../Transforms/IPO/FunctionSpecialization.cpp | 12 ++++++++-
 .../compiler-crash-60191.ll                   |  4 +--
 .../track-ptr-return.ll                       | 27 +++++++++++++------
 3 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index dd126a91da7e6..497b0ffb44b33 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -234,13 +234,15 @@ Cost InstCostVisitor::getCodeSizeSavingsForUser(Instruction *User, Value *Use,
 
   Cost CodeSize = 0;
   auto isChainableCall = [&](Instruction *I) -> bool {
+    if (!CallUsers || !UseEdge)
+      return false;
     if (CallInst *CI = dyn_cast<CallInst>(I);
         CI && CI->getIntrinsicID() == llvm::Intrinsic::not_intrinsic) {
       LLVM_DEBUG(
           dbgs() << "FnSpecialization:   Found constant forwarded via a call "
                  << *C << "\n");
       Function *F = CI->getCalledFunction();
-      if (F && CallUsers && UseEdge) { // Avoid function pointers
+      if (F) { // Avoid function pointers
         unsigned Idx = CI->getArgOperandNo(UseEdge);
         LLVM_DEBUG(dbgs() << "FnSpecialization:   Function called: "
                           << F->getName() << " argument number: " << Idx
@@ -248,9 +250,17 @@ Cost InstCostVisitor::getCodeSizeSavingsForUser(Instruction *User, Value *Use,
         (*CallUsers)[CI].first.push_back({Idx, C});
         (*CallUsers)[CI].second = F;
         return true;
+      } else if (Use == CI->getCalledOperand()) {
+        LLVM_DEBUG(dbgs() << "FnSpecialization:   Found call to constant "
+                             "function pointer.\n");
+        Function *CF = dyn_cast<Function>(C);
+        assert(CF && "Indirect call to a non-Function type");
+        (*CallUsers)[CI].second = CF;
       } else {
         LLVM_DEBUG(
             dbgs() << "FnSpecialization:   Could not find call function.\n");
+        unsigned Idx = CI->getArgOperandNo(UseEdge);
+        (*CallUsers)[CI].first.push_back({Idx, C});
       }
     }
     return false;
diff --git a/llvm/test/Transforms/FunctionSpecialization/compiler-crash-60191.ll b/llvm/test/Transforms/FunctionSpecialization/compiler-crash-60191.ll
index 668929824cc6f..456480b2cc674 100644
--- a/llvm/test/Transforms/FunctionSpecialization/compiler-crash-60191.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/compiler-crash-60191.ll
@@ -60,7 +60,7 @@ define i32 @f2(i32 %offset) {
 }
 
 ; Tests that `func` has been specialized and it didn't cause compiler crash.
+; CHECK-DAG: func.specialized.4
+; CHECK-DAG: func.specialized.5
 ; CHECK-DAG: func.specialized.1
-; CHECK-DAG: func.specialized.2
-; CHECK-DAG: func.specialized.3
 
diff --git a/llvm/test/Transforms/FunctionSpecialization/track-ptr-return.ll b/llvm/test/Transforms/FunctionSpecialization/track-ptr-return.ll
index f4ba0e72a1b43..ef40bf12ae59d 100644
--- a/llvm/test/Transforms/FunctionSpecialization/track-ptr-return.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/track-ptr-return.ll
@@ -48,9 +48,8 @@ entry:
 ; CHECK-NEXT:    [[OP1:%.*]] = call ptr @select_op.specialized.1(ptr @global_true)
 ; CHECK-NEXT:    [[OP2:%.*]] = call ptr @select_op.specialized.2(ptr @global_false)
 ; CHECK-NEXT:    [[C1:%.*]] = call i64 @compute.specialized.3(ptr @plus)
-; CHECK-NEXT:    [[C2:%.*]] = call i64 @compute.specialized.4(ptr @minus)
-; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[C1]], [[C2]]
-; CHECK-NEXT:    ret i64 [[ADD]]
+; CHECK-NEXT:    [[C2:%.*]] = call i64 @compute.specialized.5(ptr @minus)
+; CHECK-NEXT:    ret i64 2
 ;
 ;
 ; CHECK-LABEL: define ptr @select_op(
@@ -87,15 +86,27 @@ entry:
 ; CHECK-LABEL: define internal i64 @compute.specialized.3(
 ; CHECK-SAME: ptr [[OP:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[RES:%.*]] = call i64 @plus(i64 1)
-; CHECK-NEXT:    ret i64 [[RES]]
+; CHECK-NEXT:    [[RES:%.*]] = call i64 @plus.specialized.4(i64 1)
+; CHECK-NEXT:    ret i64 poison
 ;
 ;
-; CHECK-LABEL: define internal i64 @compute.specialized.4(
+; CHECK-LABEL: define internal i64 @plus.specialized.4(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret i64 poison
+;
+;
+; CHECK-LABEL: define internal i64 @compute.specialized.5(
 ; CHECK-SAME: ptr [[OP:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[RES:%.*]] = call i64 @minus(i64 1)
-; CHECK-NEXT:    ret i64 [[RES]]
+; CHECK-NEXT:    [[RES:%.*]] = call i64 @minus.specialized.6(i64 1)
+; CHECK-NEXT:    ret i64 poison
+;
+;
+; CHECK-LABEL: define internal i64 @minus.specialized.6(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret i64 poison
 ;
 ;
 ; NOLIT-LABEL: define i64 @main() {

>From 706fb4a3f596978c8cca4e2680c4fd9c01e99cae Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Mon, 29 Sep 2025 17:24:29 -0700
Subject: [PATCH 21/23] [FnSpecialization] Allow functions that are too small
 to specailize as part of a chain

This way we can still more accurately see the effect of the specialization.
---
 .../Transforms/IPO/FunctionSpecialization.h   |  2 ++
 .../Transforms/IPO/FunctionSpecialization.cpp | 19 ++++++++++++++-----
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index a70439146a144..a8f6447c9eff8 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -179,6 +179,8 @@ struct Spec {
   // Index within AllSpecs
   unsigned Loc = 0;
 
+  bool SpecializeOnOwn = true;
+
   Spec(Function *F, CallBase *CallSite, const SpecSig &S,
        CallSiteStatusT Status)
       : F(F), Clone(nullptr), Sig(S), Score(), CodeSize(), FuncSize(),
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 497b0ffb44b33..24472e56fe6ee 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -740,10 +740,19 @@ bool FunctionSpecializer::runOneSpec(Spec &S, bool Chained, SpecMap &SM,
   // If the code metrics reveal that we shouldn't duplicate the function,
   // or if the code size implies that this function is easy to get inlined,
   // then we shouldn't specialize it.
-  if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() ||
-      (RequireMinSize && Metrics.NumInsts < MinFunctionSize))
+  if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid())
     return false;
 
+  if (RequireMinSize && Metrics.NumInsts < MinFunctionSize) {
+    if (Chained) {
+      // Want to specialize as part of chain still so we can more accurately
+      // assess the chain specialization
+      S.SpecializeOnOwn = false;
+    } else {
+      return false;
+    }
+  }
+
   // When specialization on literal constants is disabled, only consider
   // recursive functions when running multiple times to save wasted analysis,
   // as we will not be able to specialize on any newly found literal constant
@@ -800,7 +809,7 @@ bool FunctionSpecializer::run() {
 
   unsigned IndepSpecs = 0;
   for (auto &S : AllSpecs)
-    if (!S.AllChains)
+    if (S.SpecializeOnOwn && !S.AllChains)
       ++IndepSpecs;
   if (!NumCandidates || !IndepSpecs) {
     LLVM_DEBUG(
@@ -813,9 +822,9 @@ bool FunctionSpecializer::run() {
   // specialization budget, which is derived from maximum number of
   // specializations per specialization candidate function.
   auto CompareScore = [&AllSpecs](unsigned I, unsigned J) {
-    if (AllSpecs[J].AllChains)
+    if (!AllSpecs[J].SpecializeOnOwn || AllSpecs[J].AllChains)
       return true;
-    if (AllSpecs[I].AllChains)
+    if (!AllSpecs[I].SpecializeOnOwn || AllSpecs[I].AllChains)
       return false;
     if (AllSpecs[I].Score != AllSpecs[J].Score)
       return AllSpecs[I].Score > AllSpecs[J].Score;

>From 8f9443849459c3acaf1416f3af675916958746e9 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Wed, 8 Oct 2025 15:02:09 -0700
Subject: [PATCH 22/23] [FnSpecialization] Don't specialize chained functions
 that take variable arguments

---
 llvm/lib/Transforms/IPO/FunctionSpecialization.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 24472e56fe6ee..746193eb2e547 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -1146,7 +1146,7 @@ bool FunctionSpecializer::findSpecializations(
         Function *NewF = CU.second.second;
 
         // Recurse only if constants found for the function
-        if (!NewF)
+        if (!NewF || NewF->isVarArg())
           continue;
 
         // Don't allow any recursion in chains

>From 54f89a308485ddcc4c138c6d3ccac8eb29f3169a Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Fri, 17 Oct 2025 09:22:13 -0700
Subject: [PATCH 23/23] Lint fix

---
 llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index a8f6447c9eff8..84bdd5c2379f1 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -127,9 +127,7 @@ struct SpecSig {
   }
 };
 
-enum CallSiteStatusT {
-  AWAITING_PARENT, HAS_PARENT, NO_PARENT
-};
+enum CallSiteStatusT { AWAITING_PARENT, HAS_PARENT, NO_PARENT };
 
 struct SpecCall {
   CallBase *CallSite;



More information about the llvm-commits mailing list