[llvm] [LLVM] Add flatten function attribute to LLVM IR and implement recursive inlining in AlwaysInliner (PR #174899)
Grigory Pastukhov via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 2 14:53:08 PST 2026
https://github.com/grigorypas updated https://github.com/llvm/llvm-project/pull/174899
>From 14636cf3afb84c32b4ea7b71d81ca21757ea55b5 Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Wed, 7 Jan 2026 15:11:05 -0800
Subject: [PATCH 01/16] Add flatten attribute to LLVM
---
llvm/include/llvm/Bitcode/LLVMBitCodes.h | 1 +
llvm/include/llvm/IR/Attributes.td | 3 +++
llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 2 ++
llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 2 ++
llvm/lib/Transforms/Utils/CodeExtractor.cpp | 1 +
llvm/test/Bitcode/attributes.ll | 6 ++++++
6 files changed, 15 insertions(+)
diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index bcf596a0d79b2..3b5e8cce05b72 100644
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -809,6 +809,7 @@ enum AttributeKindCodes {
ATTR_KIND_NO_CREATE_UNDEF_OR_POISON = 105,
ATTR_KIND_DENORMAL_FPENV = 106,
ATTR_KIND_NOOUTLINE = 107,
+ ATTR_KIND_FLATTEN = 108,
};
enum ComdatSelectionKindCodes {
diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td
index 941251003f5ba..cca5d270a1f91 100644
--- a/llvm/include/llvm/IR/Attributes.td
+++ b/llvm/include/llvm/IR/Attributes.td
@@ -134,6 +134,9 @@ def DisableSanitizerInstrumentation: EnumAttr<"disable_sanitizer_instrumentation
/// Provide pointer element type to intrinsic.
def ElementType : TypeAttr<"elementtype", IntersectPreserve, [ParamAttr]>;
+/// Flatten function by recursively inlining all calls.
+def Flatten : EnumAttr<"flatten", IntersectPreserve, [FnAttr]>;
+
/// Whether to keep return instructions, or replace with a jump to an external
/// symbol.
def FnRetThunkExtern : EnumAttr<"fn_ret_thunk_extern", IntersectPreserve, [FnAttr]>;
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 2c8612a9d7822..60ebe1c1f5cd2 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -2094,6 +2094,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
return Attribute::ElementType;
case bitc::ATTR_KIND_FNRETTHUNK_EXTERN:
return Attribute::FnRetThunkExtern;
+ case bitc::ATTR_KIND_FLATTEN:
+ return Attribute::Flatten;
case bitc::ATTR_KIND_INLINE_HINT:
return Attribute::InlineHint;
case bitc::ATTR_KIND_IN_REG:
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 6b332554bddd4..bb58e21fa15d8 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -774,6 +774,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
return bitc::ATTR_KIND_DISABLE_SANITIZER_INSTRUMENTATION;
case Attribute::FnRetThunkExtern:
return bitc::ATTR_KIND_FNRETTHUNK_EXTERN;
+ case Attribute::Flatten:
+ return bitc::ATTR_KIND_FLATTEN;
case Attribute::Hot:
return bitc::ATTR_KIND_HOT;
case Attribute::ElementType:
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index b298a8ae144d8..44c33d4f5a769 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -938,6 +938,7 @@ Function *CodeExtractor::constructFunctionDeclaration(
case Attribute::AlwaysInline:
case Attribute::Cold:
case Attribute::DisableSanitizerInstrumentation:
+ case Attribute::Flatten:
case Attribute::FnRetThunkExtern:
case Attribute::Hot:
case Attribute::HybridPatchable:
diff --git a/llvm/test/Bitcode/attributes.ll b/llvm/test/Bitcode/attributes.ll
index 4f234d7a40079..21712fae7eecd 100644
--- a/llvm/test/Bitcode/attributes.ll
+++ b/llvm/test/Bitcode/attributes.ll
@@ -526,6 +526,11 @@ define void @f_no_create_undef_or_poison() nocreateundeforpoison {
ret void;
}
+; CHECK: define void @f_flatten() [[FLATTEN:#[0-9]+]]
+define void @f_flatten() flatten {
+ ret void;
+}
+
; CHECK: define void @f87() [[FNRETTHUNKEXTERN:#[0-9]+]]
define void @f87() fn_ret_thunk_extern { ret void }
@@ -644,6 +649,7 @@ define void @dead_on_return_sized(ptr dead_on_return(4) %p) {
; CHECK: attributes #54 = { sanitize_realtime_blocking }
; CHECK: attributes #55 = { sanitize_alloc_token }
; CHECK: attributes #56 = { nocreateundeforpoison }
+; CHECK: attributes [[FLATTEN]] = { flatten }
; CHECK: attributes [[FNRETTHUNKEXTERN]] = { fn_ret_thunk_extern }
; CHECK: attributes [[SKIPPROFILE]] = { skipprofile }
; CHECK: attributes [[OPTDEBUG]] = { optdebug }
>From cd91e818264f818e6260a55660d01b59c5c2c63b Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Wed, 7 Jan 2026 17:35:33 -0800
Subject: [PATCH 02/16] Implement flattening logic in AlwaysInliner pass
---
llvm/lib/Transforms/IPO/AlwaysInliner.cpp | 233 ++++++++++++++++------
llvm/test/Transforms/Inline/flatten.ll | 142 +++++++++++++
2 files changed, 319 insertions(+), 56 deletions(-)
create mode 100644 llvm/test/Transforms/Inline/flatten.ll
diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index 4fba4475767f6..d5656e5e3db74 100644
--- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -30,21 +30,191 @@ using namespace llvm;
namespace {
+class InlinerHelper {
+ Module &M;
+ FunctionAnalysisManager *FAM;
+ function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
+ function_ref<AAResults &(Function &)> GetAAR;
+ bool InsertLifetime;
+
+ SmallSetVector<Function *, 16> MaybeInlinedFunctions;
+ InlineFunctionInfo IFI;
+
+public:
+ InlinerHelper(Module &M, ProfileSummaryInfo &PSI,
+ FunctionAnalysisManager *FAM,
+ function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
+ function_ref<AAResults &(Function &)> GetAAR,
+ bool InsertLifetime)
+ : M(M), FAM(FAM), GetAssumptionCache(GetAssumptionCache), GetAAR(GetAAR),
+ InsertLifetime(InsertLifetime), IFI(GetAssumptionCache, &PSI) {}
+
+ bool canInline(Function &F) {
+ return !F.isPresplitCoroutine() && !F.isDeclaration() &&
+ isInlineViable(F).isSuccess();
+ }
+
+ bool tryInline(CallBase &CB, StringRef InlignReason) {
+ IFI.reset();
+ Function &Callee = *CB.getCalledFunction();
+ Function *Caller = CB.getCaller();
+ OptimizationRemarkEmitter ORE(Caller);
+ DebugLoc DLoc = CB.getDebugLoc();
+ BasicBlock *Block = CB.getParent();
+
+ InlineResult Res = InlineFunction(CB, IFI, /*MergeAttributes=*/true,
+ &GetAAR(Callee), InsertLifetime);
+ if (!Res.isSuccess()) {
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
+ << "'" << ore::NV("Callee", &Callee) << "' is not inlined into '"
+ << ore::NV("Caller", Caller)
+ << "': " << ore::NV("Reason", Res.getFailureReason());
+ });
+ return false;
+ }
+
+ emitInlinedIntoBasedOnCost(ORE, DLoc, Block, Callee, *Caller,
+ InlineCost::getAlways(InlignReason.data()),
+ /*ForProfileContext=*/false, DEBUG_TYPE);
+ if (FAM)
+ FAM->invalidate(*Caller, PreservedAnalyses::none());
+ return true;
+ }
+
+ void addNewCallsToWorklist(
+ SmallVectorImpl<std::pair<CallBase *, int>> &Worklist,
+ int InlineHistoryID,
+ SmallVectorImpl<std::pair<Function *, int>> &InlineHistory,
+ Function *InlinedCallee) {
+ if (IFI.InlinedCallSites.empty())
+ return;
+ int NewHistoryID = InlineHistory.size();
+ InlineHistory.push_back({InlinedCallee, InlineHistoryID});
+ for (CallBase *CB : IFI.InlinedCallSites)
+ Worklist.push_back({CB, NewHistoryID});
+ }
+
+ void addToMaybeInlinedFunctions(Function &F) {
+ MaybeInlinedFunctions.insert(&F);
+ }
+
+ bool postInlinerCleanup() {
+ SmallVector<Function *, 16> InlinedComdatFunctions;
+ bool Changed = false;
+ for (Function *F : MaybeInlinedFunctions) {
+ F->removeDeadConstantUsers();
+ if (F->hasFnAttribute(Attribute::AlwaysInline) &&
+ F->isDefTriviallyDead()) {
+ if (F->hasComdat()) {
+ InlinedComdatFunctions.push_back(F);
+ } else {
+ if (FAM)
+ FAM->clear(*F, F->getName());
+ M.getFunctionList().erase(F);
+ Changed = true;
+ }
+ }
+ }
+ if (!InlinedComdatFunctions.empty()) {
+ // Now we just have the comdat functions. Filter out the ones whose
+ // comdats are not actually dead.
+ filterDeadComdatFunctions(InlinedComdatFunctions);
+ // The remaining functions are actually dead.
+ for (Function *F : InlinedComdatFunctions) {
+ if (FAM)
+ FAM->clear(*F, F->getName());
+ M.getFunctionList().erase(F);
+ Changed = true;
+ }
+ }
+ return Changed;
+ }
+};
+
+static bool inlineHistoryIncludes(
+ Function *F, int InlineHistoryID,
+ const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
+ while (InlineHistoryID != -1) {
+ assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
+ "Invalid inline history ID");
+ if (InlineHistory[InlineHistoryID].first == F)
+ return true;
+ InlineHistoryID = InlineHistory[InlineHistoryID].second;
+ }
+ return false;
+}
+
+bool flattenFunction(Function &F, InlinerHelper &IH) {
+ SmallVector<std::pair<CallBase *, int>, 16> Worklist;
+ SmallVector<std::pair<Function *, int>, 16> InlineHistory;
+ OptimizationRemarkEmitter ORE(&F);
+
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB) {
+ if (auto *CB = dyn_cast<CallBase>(&I)) {
+ if (CB->getAttributes().hasFnAttr(Attribute::NoInline))
+ continue;
+ Function *Callee = CB->getCalledFunction();
+ if (!Callee)
+ continue;
+ if (!IH.canInline(*Callee)) {
+ continue;
+ }
+ Worklist.push_back({CB, -1});
+ }
+ }
+ }
+ bool Changed = false;
+ while (!Worklist.empty()) {
+ std::pair<CallBase *, int> P = Worklist.pop_back_val();
+ CallBase *CB = P.first;
+ int InlineHistoryID = P.second;
+ Function *Callee = CB->getCalledFunction();
+ if (!Callee)
+ continue;
+
+ if (Callee == &F ||
+ inlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) {
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined",
+ CB->getDebugLoc(), CB->getParent())
+ << "'" << ore::NV("Callee", Callee) << "' is not inlined into '"
+ << ore::NV("Caller", CB->getCaller())
+ << "': recursive call during flattening";
+ });
+ continue;
+ }
+
+ if (IH.tryInline(*CB, "flatten attribute")) {
+ Changed = true;
+ IH.addToMaybeInlinedFunctions(*Callee);
+ IH.addNewCallsToWorklist(Worklist, InlineHistoryID, InlineHistory,
+ Callee);
+ }
+ }
+ return Changed;
+}
+
bool AlwaysInlineImpl(
Module &M, bool InsertLifetime, ProfileSummaryInfo &PSI,
FunctionAnalysisManager *FAM,
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
function_ref<AAResults &(Function &)> GetAAR) {
SmallSetVector<CallBase *, 16> Calls;
+ InlinerHelper IH(M, PSI, FAM, GetAssumptionCache, GetAAR, InsertLifetime);
+ SmallVector<Function *, 4> NeedFlattening;
+
bool Changed = false;
SmallVector<Function *, 16> InlinedComdatFunctions;
for (Function &F : make_early_inc_range(M)) {
- if (F.isPresplitCoroutine())
- continue;
+ if (F.hasFnAttribute(Attribute::Flatten))
+ NeedFlattening.push_back(&F);
- if (F.isDeclaration() || !isInlineViable(F).isSuccess())
+ if (!IH.canInline(F))
continue;
+ IH.addToMaybeInlinedFunctions(F);
Calls.clear();
@@ -56,62 +226,13 @@ bool AlwaysInlineImpl(
Calls.insert(CB);
for (CallBase *CB : Calls) {
- Function *Caller = CB->getCaller();
- OptimizationRemarkEmitter ORE(Caller);
- DebugLoc DLoc = CB->getDebugLoc();
- BasicBlock *Block = CB->getParent();
-
- InlineFunctionInfo IFI(GetAssumptionCache, &PSI, nullptr, nullptr);
- InlineResult Res = InlineFunction(*CB, IFI, /*MergeAttributes=*/true,
- &GetAAR(F), InsertLifetime);
- if (!Res.isSuccess()) {
- ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
- << "'" << ore::NV("Callee", &F) << "' is not inlined into '"
- << ore::NV("Caller", Caller)
- << "': " << ore::NV("Reason", Res.getFailureReason());
- });
- continue;
- }
-
- emitInlinedIntoBasedOnCost(
- ORE, DLoc, Block, F, *Caller,
- InlineCost::getAlways("always inline attribute"),
- /*ForProfileContext=*/false, DEBUG_TYPE);
-
- Changed = true;
- if (FAM)
- FAM->invalidate(*Caller, PreservedAnalyses::none());
- }
-
- F.removeDeadConstantUsers();
- if (F.hasFnAttribute(Attribute::AlwaysInline) && F.isDefTriviallyDead()) {
- // Remember to try and delete this function afterward. This allows to call
- // filterDeadComdatFunctions() only once.
- if (F.hasComdat()) {
- InlinedComdatFunctions.push_back(&F);
- } else {
- if (FAM)
- FAM->clear(F, F.getName());
- M.getFunctionList().erase(F);
- Changed = true;
- }
- }
- }
-
- if (!InlinedComdatFunctions.empty()) {
- // Now we just have the comdat functions. Filter out the ones whose comdats
- // are not actually dead.
- filterDeadComdatFunctions(InlinedComdatFunctions);
- // The remaining functions are actually dead.
- for (Function *F : InlinedComdatFunctions) {
- if (FAM)
- FAM->clear(*F, F->getName());
- M.getFunctionList().erase(F);
- Changed = true;
+ Changed |= IH.tryInline(*CB, "always inline attribute");
}
}
+ for (Function *F : NeedFlattening)
+ Changed |= flattenFunction(*F, IH);
+ Changed |= IH.postInlinerCleanup();
return Changed;
}
diff --git a/llvm/test/Transforms/Inline/flatten.ll b/llvm/test/Transforms/Inline/flatten.ll
new file mode 100644
index 0000000000000..a50bcb6cb591b
--- /dev/null
+++ b/llvm/test/Transforms/Inline/flatten.ll
@@ -0,0 +1,142 @@
+; RUN: opt -passes=always-inline -S < %s | FileCheck %s
+; RUN: opt -passes=always-inline -pass-remarks-missed=inline -S < %s 2>&1 | FileCheck %s --check-prefix=REMARK
+
+; Test that the flatten attribute recursively inlines all calls.
+
+; Multiple levels are inlined.
+define internal i32 @leaf() {
+ ret i32 42
+}
+
+define internal i32 @middle() {
+ %r = call i32 @leaf()
+ ret i32 %r
+}
+
+define i32 @test_multilevel() flatten {
+; CHECK-LABEL: @test_multilevel(
+; CHECK-NOT: call i32 @middle
+; CHECK-NOT: call i32 @leaf
+; CHECK: ret i32 42
+ %r = call i32 @middle()
+ ret i32 %r
+}
+
+; Functions with invoke are inlined.
+declare i32 @__gxx_personality_v0(...)
+declare void @may_throw()
+
+define internal i32 @callee_with_invoke() personality ptr @__gxx_personality_v0 {
+entry:
+ invoke void @may_throw() to label %cont unwind label %lpad
+cont:
+ ret i32 100
+lpad:
+ %lp = landingpad { ptr, i32 } cleanup
+ resume { ptr, i32 } %lp
+}
+
+define i32 @test_invoke() flatten personality ptr @__gxx_personality_v0 {
+; CHECK-LABEL: @test_invoke(
+; CHECK-NOT: call i32 @callee_with_invoke
+; CHECK: invoke void @may_throw()
+; CHECK: ret i32 100
+entry:
+ %r = call i32 @callee_with_invoke()
+ ret i32 %r
+}
+
+; Declaration without definition is not inlined.
+declare i32 @external_func()
+
+define i32 @test_declaration() flatten {
+; CHECK-LABEL: @test_declaration(
+; CHECK: call i32 @external_func()
+; CHECK: ret i32
+ %r = call i32 @external_func()
+ ret i32 %r
+}
+
+; Indirect calls are not inlined.
+define internal i32 @target_func() {
+ ret i32 99
+}
+
+define i32 @test_indirect(ptr %func_ptr) flatten {
+; CHECK-LABEL: @test_indirect(
+; CHECK: call i32 %func_ptr()
+; CHECK: ret i32
+ %r = call i32 %func_ptr()
+ ret i32 %r
+}
+
+; Direct recursion back to flattened function.
+; The callee calls the flattened function - should not cause infinite inlining.
+define internal i32 @calls_flattened_func() {
+ %r = call i32 @test_direct_recursion()
+ ret i32 %r
+}
+
+define i32 @test_direct_recursion() flatten {
+; CHECK-LABEL: @test_direct_recursion(
+; The call to calls_flattened_func should be inlined, but the recursive call back
+; to test_direct_recursion should remain.
+; CHECK-NOT: call i32 @calls_flattened_func()
+; CHECK: call i32 @test_direct_recursion()
+; CHECK: ret i32
+ %r = call i32 @calls_flattened_func()
+ ret i32 %r
+}
+
+; Mutual recursion (A calls B, B calls A).
+; Should inline once but not infinitely.
+define internal i32 @mutual_a() {
+ %r = call i32 @mutual_b()
+ ret i32 %r
+}
+
+define internal i32 @mutual_b() {
+ %r = call i32 @mutual_a()
+ ret i32 %r
+}
+
+define i32 @test_mutual_recursion() flatten {
+; CHECK-LABEL: @test_mutual_recursion(
+; After inlining mutual_a, we get call to mutual_b.
+; After inlining mutual_b, we get call to mutual_a which should remain (skipped due to recursion).
+; CHECK-NOT: call i32 @mutual_b()
+; CHECK: call i32 @mutual_a()
+; CHECK: ret i32
+ %r = call i32 @mutual_a()
+ ret i32 %r
+}
+
+; Recursive callee via indirection.
+; A function that is part of a recursive cycle should be inlined once but not infinitely.
+; Note: Direct self-recursive functions (f calls f) are not inlineable in LLVM.
+; So we test with mutual recursion pattern where each function individually is viable.
+define internal i32 @recursive_a(i32 %n) {
+ %r = call i32 @recursive_b(i32 %n)
+ ret i32 %r
+}
+
+define internal i32 @recursive_b(i32 %n) {
+ %r = call i32 @recursive_a(i32 %n)
+ ret i32 %r
+}
+
+define i32 @test_self_recursion() flatten {
+; CHECK-LABEL: @test_self_recursion(
+; After inlining recursive_a (produces call to recursive_b with the original arg)
+; After inlining recursive_b (produces call to recursive_a - skipped due to history)
+; Both recursive_a and recursive_b should be inlined (CHECK-NOT matches any call to them)
+; The remaining call is to recursive_a with the propagated constant.
+; CHECK-NOT: call i32 @recursive_b
+; CHECK: call i32 @recursive_a(i32 5)
+; CHECK: ret i32
+ %r = call i32 @recursive_a(i32 5)
+ ret i32 %r
+}
+
+; Check that optimization remark is emitted for recursive calls during flattening.
+; REMARK: remark: {{.*}} 'test_direct_recursion' is not inlined into 'test_direct_recursion': recursive call during flattening
>From b8b5bea1a50e63359d50f290d213affbbd43eee5 Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Fri, 9 Jan 2026 12:32:03 -0800
Subject: [PATCH 03/16] Bug fixes: filter non-inlanable new call sites and ABI
violations
---
llvm/lib/Transforms/IPO/AlwaysInliner.cpp | 38 ++++++++++++++++++++---
llvm/test/Transforms/Inline/flatten.ll | 15 +++++++++
2 files changed, 48 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index d5656e5e3db74..f729e84fe1db7 100644
--- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Transforms/Utils/Cloning.h"
@@ -145,7 +146,8 @@ static bool inlineHistoryIncludes(
return false;
}
-bool flattenFunction(Function &F, InlinerHelper &IH) {
+bool flattenFunction(Function &F, InlinerHelper &IH,
+ function_ref<TargetTransformInfo &(Function &)> GetTTI) {
SmallVector<std::pair<CallBase *, int>, 16> Worklist;
SmallVector<std::pair<Function *, int>, 16> InlineHistory;
OptimizationRemarkEmitter ORE(&F);
@@ -186,6 +188,19 @@ bool flattenFunction(Function &F, InlinerHelper &IH) {
continue;
}
+ if (!IH.canInline(*Callee))
+ continue;
+
+ // Use TTI to check for target-specific hard inlining restrictions.
+ // This includes checks like:
+ // - Cannot inline streaming callee into non-streaming caller
+ // - Cannot inline functions that create new ZA/ZT0 state
+ // For flatten, we respect the user's intent to inline as much as possible,
+ // but these are fundamental ABI violations that cannot be worked around.
+ TargetTransformInfo &TTI = GetTTI(F);
+ if (!TTI.areInlineCompatible(&F, Callee))
+ continue;
+
if (IH.tryInline(*CB, "flatten attribute")) {
Changed = true;
IH.addToMaybeInlinedFunctions(*Callee);
@@ -200,7 +215,8 @@ bool AlwaysInlineImpl(
Module &M, bool InsertLifetime, ProfileSummaryInfo &PSI,
FunctionAnalysisManager *FAM,
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
- function_ref<AAResults &(Function &)> GetAAR) {
+ function_ref<AAResults &(Function &)> GetAAR,
+ function_ref<TargetTransformInfo &(Function &)> GetTTI) {
SmallSetVector<CallBase *, 16> Calls;
InlinerHelper IH(M, PSI, FAM, GetAssumptionCache, GetAAR, InsertLifetime);
SmallVector<Function *, 4> NeedFlattening;
@@ -229,8 +245,12 @@ bool AlwaysInlineImpl(
Changed |= IH.tryInline(*CB, "always inline attribute");
}
}
+
+ // Only call flattenFunction (which uses TTI) if there are functions to
+ // flatten. This ensures TTI analysis is not requested at -O0 when there are
+ // no flatten functions, avoiding any overhead.
for (Function *F : NeedFlattening)
- Changed |= flattenFunction(*F, IH);
+ Changed |= flattenFunction(*F, IH, GetTTI);
Changed |= IH.postInlinerCleanup();
return Changed;
@@ -255,9 +275,12 @@ struct AlwaysInlinerLegacyPass : public ModulePass {
auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
return getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
};
+ auto GetTTI = [&](Function &F) -> TargetTransformInfo & {
+ return getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ };
return AlwaysInlineImpl(M, InsertLifetime, PSI, /*FAM=*/nullptr,
- GetAssumptionCache, GetAAR);
+ GetAssumptionCache, GetAAR, GetTTI);
}
static char ID; // Pass identification, replacement for typeid
@@ -266,6 +289,7 @@ struct AlwaysInlinerLegacyPass : public ModulePass {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
}
};
@@ -277,6 +301,7 @@ INITIALIZE_PASS_BEGIN(AlwaysInlinerLegacyPass, "always-inline",
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(AlwaysInlinerLegacyPass, "always-inline",
"Inliner for always_inline functions", false, false)
@@ -294,10 +319,13 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M,
auto GetAAR = [&](Function &F) -> AAResults & {
return FAM.getResult<AAManager>(F);
};
+ auto GetTTI = [&](Function &F) -> TargetTransformInfo & {
+ return FAM.getResult<TargetIRAnalysis>(F);
+ };
auto &PSI = MAM.getResult<ProfileSummaryAnalysis>(M);
bool Changed = AlwaysInlineImpl(M, InsertLifetime, PSI, &FAM,
- GetAssumptionCache, GetAAR);
+ GetAssumptionCache, GetAAR, GetTTI);
if (!Changed)
return PreservedAnalyses::all();
diff --git a/llvm/test/Transforms/Inline/flatten.ll b/llvm/test/Transforms/Inline/flatten.ll
index a50bcb6cb591b..70a08cd951b95 100644
--- a/llvm/test/Transforms/Inline/flatten.ll
+++ b/llvm/test/Transforms/Inline/flatten.ll
@@ -57,6 +57,21 @@ define i32 @test_declaration() flatten {
ret i32 %r
}
+; Inlined callee that calls a declaration - the declaration should remain after flattening.
+define internal i32 @calls_external() {
+ %r = call i32 @external_func()
+ ret i32 %r
+}
+
+define i32 @test_inline_then_declaration() flatten {
+; CHECK-LABEL: @test_inline_then_declaration(
+; CHECK-NOT: call i32 @calls_external()
+; CHECK: call i32 @external_func()
+; CHECK: ret i32
+ %r = call i32 @calls_external()
+ ret i32 %r
+}
+
; Indirect calls are not inlined.
define internal i32 @target_func() {
ret i32 99
>From c4f4638fc87a0c867462c55a25f1c103e3e7f65c Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Fri, 9 Jan 2026 12:39:49 -0800
Subject: [PATCH 04/16] Add test case to test ABI violation
---
.../Transforms/Inline/AArch64/flatten-sme.ll | 53 +++++++++++++++++++
1 file changed, 53 insertions(+)
create mode 100644 llvm/test/Transforms/Inline/AArch64/flatten-sme.ll
diff --git a/llvm/test/Transforms/Inline/AArch64/flatten-sme.ll b/llvm/test/Transforms/Inline/AArch64/flatten-sme.ll
new file mode 100644
index 0000000000000..e85902650487e
--- /dev/null
+++ b/llvm/test/Transforms/Inline/AArch64/flatten-sme.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+sme -S -passes=always-inline | FileCheck %s
+
+; Test that flatten attribute respects ABI restrictions for SME.
+; Streaming callee cannot be inlined into non-streaming caller.
+; new_za callee cannot be inlined at all.
+
+define internal i32 @streaming_callee() "aarch64_pstate_sm_enabled" {
+ ret i32 42
+}
+
+define internal i32 @new_za_callee() "aarch64_new_za" {
+ ret i32 100
+}
+
+define internal i32 @normal_callee() {
+ ret i32 50
+}
+
+; Streaming callee -> non-streaming caller: should NOT be inlined (ABI violation).
+define i32 @test_streaming_not_inlined() flatten {
+; CHECK-LABEL: @test_streaming_not_inlined(
+; CHECK: call i32 @streaming_callee()
+; CHECK: ret i32
+ %r = call i32 @streaming_callee()
+ ret i32 %r
+}
+
+; new_za callee: should NOT be inlined (ABI violation - callee allocates new ZA).
+define i32 @test_new_za_not_inlined() flatten {
+; CHECK-LABEL: @test_new_za_not_inlined(
+; CHECK: call i32 @new_za_callee()
+; CHECK: ret i32
+ %r = call i32 @new_za_callee()
+ ret i32 %r
+}
+
+; Streaming caller -> streaming callee: should be inlined (compatible).
+define i32 @test_streaming_to_streaming() flatten "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: @test_streaming_to_streaming(
+; CHECK-NOT: call i32 @streaming_callee
+; CHECK: ret i32 42
+ %r = call i32 @streaming_callee()
+ ret i32 %r
+}
+
+; Non-streaming caller -> non-streaming callee: should be inlined.
+define i32 @test_normal_inlined() flatten {
+; CHECK-LABEL: @test_normal_inlined(
+; CHECK-NOT: call i32 @normal_callee
+; CHECK: ret i32 50
+ %r = call i32 @normal_callee()
+ ret i32 %r
+}
>From b28f4677a7ee43d06971b7f0abbe0cdc72f9cb44 Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Wed, 14 Jan 2026 14:14:31 -0800
Subject: [PATCH 05/16] Run TTI on Callee
---
llvm/lib/Transforms/IPO/AlwaysInliner.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index f729e84fe1db7..27f1a60d076d4 100644
--- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -197,7 +197,7 @@ bool flattenFunction(Function &F, InlinerHelper &IH,
// - Cannot inline functions that create new ZA/ZT0 state
// For flatten, we respect the user's intent to inline as much as possible,
// but these are fundamental ABI violations that cannot be worked around.
- TargetTransformInfo &TTI = GetTTI(F);
+ TargetTransformInfo &TTI = GetTTI(*Callee);
if (!TTI.areInlineCompatible(&F, Callee))
continue;
>From 65c55cd410817381c75b4fb32135c3168e6e79ba Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Thu, 22 Jan 2026 15:52:35 -0800
Subject: [PATCH 06/16] Add flattening logic to Inliner pass
---
llvm/include/llvm/Analysis/InlineAdvisor.h | 7 ++
llvm/lib/Analysis/InlineAdvisor.cpp | 11 +++
llvm/lib/Transforms/IPO/Inliner.cpp | 106 +++++++++++++++++++++
llvm/test/Transforms/Inline/flatten.ll | 2 +
4 files changed, 126 insertions(+)
diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h
index 50ba3c13da70f..d45b2c6dcd079 100644
--- a/llvm/include/llvm/Analysis/InlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/InlineAdvisor.h
@@ -180,6 +180,13 @@ class LLVM_ABI InlineAdvisor {
std::unique_ptr<InlineAdvice> getAdvice(CallBase &CB,
bool MandatoryOnly = false);
+ /// Get an InlineAdvice for a call site without performing cost analysis.
+ /// This is useful for cases like the flatten attribute where we want to
+ /// inline all viable calls regardless of cost. The viability checks
+ /// (TTI compatibility, noinline attributes, etc.) are still performed.
+ /// Returns advice with isInliningRecommended() = true if the call is viable.
+ std::unique_ptr<InlineAdvice> getAdviceWithoutCost(CallBase &CB);
+
/// This must be called when the Inliner pass is entered, to allow the
/// InlineAdvisor update internal state, as result of function passes run
/// between Inliner pass runs (for the same module).
diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp
index 1fb2f7e780031..abee47a484eb7 100644
--- a/llvm/lib/Analysis/InlineAdvisor.cpp
+++ b/llvm/lib/Analysis/InlineAdvisor.cpp
@@ -654,6 +654,17 @@ std::unique_ptr<InlineAdvice> InlineAdvisor::getAdvice(CallBase &CB,
return getMandatoryAdvice(CB, Advice);
}
+std::unique_ptr<InlineAdvice>
+InlineAdvisor::getAdviceWithoutCost(CallBase &CB) {
+ // Check if the call is viable for inlining without performing cost analysis.
+ // This is useful for cases like the flatten attribute where we want to
+ // inline all viable calls regardless of cost.
+ bool IsViable = CB.getCaller() != CB.getCalledFunction() &&
+ MandatoryInliningKind::Never !=
+ getMandatoryKind(CB, FAM, getCallerORE(CB));
+ return getMandatoryAdvice(CB, IsViable);
+}
+
OptimizationRemarkEmitter &InlineAdvisor::getCallerORE(CallBase &CB) {
return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*CB.getCaller());
}
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
index fb376562f6781..533af266f1f61 100644
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -195,6 +195,101 @@ InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
return *IAA->getAdvisor();
}
+/// Flatten a function by inlining all calls within it recursively.
+/// This implements the flatten attribute behavior for the CGSCC inliner.
+/// Returns true if any inlining was performed.
+static bool flattenFunction(Function &F, FunctionAnalysisManager &FAM,
+ ProfileSummaryInfo *PSI, InlineAdvisor &Advisor) {
+ SmallVector<std::pair<CallBase *, int>, 16> Worklist;
+ SmallVector<std::pair<Function *, int>, 16> InlineHistory;
+
+ auto GetAssumptionCache = [&](Function &Fn) -> AssumptionCache & {
+ return FAM.getResult<AssumptionAnalysis>(Fn);
+ };
+
+ // Collect initial calls.
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB) {
+ if (auto *CB = dyn_cast<CallBase>(&I)) {
+ if (CB->getAttributes().hasFnAttr(Attribute::NoInline))
+ continue;
+ Function *Callee = CB->getCalledFunction();
+ if (!Callee || Callee->isDeclaration())
+ continue;
+ Worklist.push_back({CB, -1});
+ }
+ }
+ }
+
+ bool Changed = false;
+ while (!Worklist.empty()) {
+ std::pair<CallBase *, int> P = Worklist.pop_back_val();
+ CallBase *CB = P.first;
+ int InlineHistoryID = P.second;
+ Function *Callee = CB->getCalledFunction();
+ if (!Callee)
+ continue;
+
+ // Detect recursion.
+ if (Callee == &F ||
+ inlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) {
+ LLVM_DEBUG(dbgs() << "Skipping recursive call during flattening: "
+ << F.getName() << " -> " << Callee->getName() << "\n");
+ setInlineRemark(*CB, "recursive");
+ auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined",
+ CB->getDebugLoc(), CB->getParent())
+ << "'" << ore::NV("Callee", Callee) << "' is not inlined into '"
+ << ore::NV("Caller", CB->getCaller())
+ << "': recursive call during flattening";
+ });
+ continue;
+ }
+
+ // Use the advisor to check viability without performing cost analysis.
+ // For flatten, we want to inline all viable calls regardless of cost.
+ std::unique_ptr<InlineAdvice> Advice = Advisor.getAdviceWithoutCost(*CB);
+ if (!Advice)
+ continue;
+
+ if (!Advice->isInliningRecommended()) {
+ Advice->recordUnattemptedInlining();
+ continue;
+ }
+
+ InlineFunctionInfo IFI(GetAssumptionCache, PSI);
+
+ InlineResult IR =
+ InlineFunction(*CB, IFI, /*MergeAttributes=*/true,
+ &FAM.getResult<AAManager>(F), /*InsertLifetime=*/true);
+ if (!IR.isSuccess()) {
+ Advice->recordUnsuccessfulInlining(IR);
+ continue;
+ }
+
+ Advice->recordInlining();
+ Changed = true;
+
+ // Add new call sites from the inlined function to the worklist.
+ if (!IFI.InlinedCallSites.empty()) {
+ int NewHistoryID = InlineHistory.size();
+ InlineHistory.push_back({Callee, InlineHistoryID});
+ for (CallBase *ICB : IFI.InlinedCallSites) {
+ Function *NewCallee = ICB->getCalledFunction();
+ if (NewCallee && !NewCallee->isDeclaration() &&
+ !ICB->getAttributes().hasFnAttr(Attribute::NoInline))
+ Worklist.push_back({ICB, NewHistoryID});
+ }
+ }
+ }
+
+ if (Changed)
+ FAM.invalidate(F, PreservedAnalyses::none());
+
+ return Changed;
+}
+
void makeFunctionBodyUnreachable(Function &F) {
F.dropAllReferences();
for (BasicBlock &BB : make_early_inc_range(F))
@@ -248,8 +343,15 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// incrementally maknig a single function grow in a super linear fashion.
SmallVector<std::pair<CallBase *, int>, 16> Calls;
+ // Track functions with flatten attribute for processing at the end.
+ SmallSetVector<Function *, 4> FlattenFunctions;
+
// Populate the initial list of calls in this SCC.
for (auto &N : InitialC) {
+ Function &Fn = N.getFunction();
+ if (Fn.hasFnAttribute(Attribute::Flatten))
+ FlattenFunctions.insert(&Fn);
+
auto &ORE =
FAM.getResult<OptimizationRemarkEmitterAnalysis>(N.getFunction());
// We want to generally process call sites top-down in order for
@@ -535,6 +637,10 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
FAM.invalidate(F, PreservedAnalyses::none());
}
+ // Now flatten functions with the flatten attribute.
+ for (Function *FlattenF : FlattenFunctions)
+ Changed |= flattenFunction(*FlattenF, FAM, PSI, Advisor);
+
// We must ensure that we only delete functions with comdats if every function
// in the comdat is going to be deleted.
if (!DeadFunctionsInComdats.empty()) {
diff --git a/llvm/test/Transforms/Inline/flatten.ll b/llvm/test/Transforms/Inline/flatten.ll
index 70a08cd951b95..e0e08383a16ef 100644
--- a/llvm/test/Transforms/Inline/flatten.ll
+++ b/llvm/test/Transforms/Inline/flatten.ll
@@ -1,5 +1,7 @@
; RUN: opt -passes=always-inline -S < %s | FileCheck %s
; RUN: opt -passes=always-inline -pass-remarks-missed=inline -S < %s 2>&1 | FileCheck %s --check-prefix=REMARK
+; RUN: opt -passes=inline -S < %s | FileCheck %s
+; RUN: opt -passes='cgscc(inline<only-mandatory>)' -S < %s | FileCheck %s
; Test that the flatten attribute recursively inlines all calls.
>From f0d3076ff440f8eed431c127e6f7d3d66b6b2f1c Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Thu, 22 Jan 2026 18:17:04 -0800
Subject: [PATCH 07/16] Move common code to utility file
---
.../llvm/Transforms/IPO/InliningUtils.h | 119 +++++++++++++++
llvm/lib/Transforms/IPO/AlwaysInliner.cpp | 108 ++++----------
llvm/lib/Transforms/IPO/Inliner.cpp | 140 ++++++------------
3 files changed, 195 insertions(+), 172 deletions(-)
create mode 100644 llvm/include/llvm/Transforms/IPO/InliningUtils.h
diff --git a/llvm/include/llvm/Transforms/IPO/InliningUtils.h b/llvm/include/llvm/Transforms/IPO/InliningUtils.h
new file mode 100644
index 0000000000000..520c9d8343c12
--- /dev/null
+++ b/llvm/include/llvm/Transforms/IPO/InliningUtils.h
@@ -0,0 +1,119 @@
+//===- InliningUtils.h - Shared inlining utilities -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines shared utilities used by the inliner passes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_INLININGUTILS_H
+#define LLVM_TRANSFORMS_IPO_INLININGUTILS_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+
+namespace llvm {
+
+/// Check if Function F appears in the inline history chain.
+/// InlineHistory is a vector of (Function, ParentHistoryID) pairs.
+/// Returns true if F was already inlined in the chain leading to
+/// InlineHistoryID.
+inline bool inlineHistoryIncludes(
+ Function *F, int InlineHistoryID,
+ const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
+ while (InlineHistoryID != -1) {
+ assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
+ "Invalid inline history ID");
+ if (InlineHistory[InlineHistoryID].first == F)
+ return true;
+ InlineHistoryID = InlineHistory[InlineHistoryID].second;
+ }
+ return false;
+}
+
+/// Flatten a function by inlining all calls recursively.
+///
+/// PolicyT must provide:
+/// - bool canInlineCall(Function &F, CallBase &CB): Check if call can be
+/// inlined into F
+/// - bool doInline(Function &F, CallBase &CB, Function &Callee): Perform
+/// the inline, return true on success
+/// - ArrayRef<CallBase *> getNewCallSites(): Get call sites from last inline
+///
+/// Returns true if any inlining was performed.
+template <typename PolicyT>
+bool flattenFunction(Function &F, PolicyT &Policy,
+ OptimizationRemarkEmitter &ORE) {
+ SmallVector<std::pair<CallBase *, int>, 16> Worklist;
+ SmallVector<std::pair<Function *, int>, 16> InlineHistory;
+
+ // Collect initial calls.
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB) {
+ if (auto *CB = dyn_cast<CallBase>(&I)) {
+ if (CB->getAttributes().hasFnAttr(Attribute::NoInline))
+ continue;
+ Function *Callee = CB->getCalledFunction();
+ if (!Callee || Callee->isDeclaration())
+ continue;
+ Worklist.push_back({CB, -1});
+ }
+ }
+ }
+
+ bool Changed = false;
+ while (!Worklist.empty()) {
+ auto Item = Worklist.pop_back_val();
+ CallBase *CB = Item.first;
+ int InlineHistoryID = Item.second;
+ Function *Callee = CB->getCalledFunction();
+ if (!Callee)
+ continue;
+
+ // Detect recursion.
+ if (Callee == &F ||
+ inlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) {
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed("inline", "NotInlined",
+ CB->getDebugLoc(), CB->getParent())
+ << "'" << ore::NV("Callee", Callee) << "' is not inlined into '"
+ << ore::NV("Caller", CB->getCaller())
+ << "': recursive call during flattening";
+ });
+ continue;
+ }
+
+ if (!Policy.canInlineCall(F, *CB))
+ continue;
+
+ if (!Policy.doInline(F, *CB, *Callee))
+ continue;
+
+ Changed = true;
+
+ // Add new call sites from the inlined function to the worklist.
+ ArrayRef<CallBase *> NewCallSites = Policy.getNewCallSites();
+ if (!NewCallSites.empty()) {
+ int NewHistoryID = InlineHistory.size();
+ InlineHistory.push_back({Callee, InlineHistoryID});
+ for (CallBase *NewCB : NewCallSites) {
+ Function *NewCallee = NewCB->getCalledFunction();
+ if (NewCallee && !NewCallee->isDeclaration() &&
+ !NewCB->getAttributes().hasFnAttr(Attribute::NoInline))
+ Worklist.push_back({NewCB, NewHistoryID});
+ }
+ }
+ }
+
+ return Changed;
+}
+
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_IPO_INLININGUTILS_H
diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index 27f1a60d076d4..52adf0853db91 100644
--- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -22,6 +22,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/IPO/InliningUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -83,17 +84,8 @@ class InlinerHelper {
return true;
}
- void addNewCallsToWorklist(
- SmallVectorImpl<std::pair<CallBase *, int>> &Worklist,
- int InlineHistoryID,
- SmallVectorImpl<std::pair<Function *, int>> &InlineHistory,
- Function *InlinedCallee) {
- if (IFI.InlinedCallSites.empty())
- return;
- int NewHistoryID = InlineHistory.size();
- InlineHistory.push_back({InlinedCallee, InlineHistoryID});
- for (CallBase *CB : IFI.InlinedCallSites)
- Worklist.push_back({CB, NewHistoryID});
+ ArrayRef<CallBase *> getInlinedCallSites() const {
+ return IFI.InlinedCallSites;
}
void addToMaybeInlinedFunctions(Function &F) {
@@ -133,64 +125,20 @@ class InlinerHelper {
}
};
-static bool inlineHistoryIncludes(
- Function *F, int InlineHistoryID,
- const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
- while (InlineHistoryID != -1) {
- assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
- "Invalid inline history ID");
- if (InlineHistory[InlineHistoryID].first == F)
- return true;
- InlineHistoryID = InlineHistory[InlineHistoryID].second;
- }
- return false;
-}
-
-bool flattenFunction(Function &F, InlinerHelper &IH,
- function_ref<TargetTransformInfo &(Function &)> GetTTI) {
- SmallVector<std::pair<CallBase *, int>, 16> Worklist;
- SmallVector<std::pair<Function *, int>, 16> InlineHistory;
- OptimizationRemarkEmitter ORE(&F);
-
- for (BasicBlock &BB : F) {
- for (Instruction &I : BB) {
- if (auto *CB = dyn_cast<CallBase>(&I)) {
- if (CB->getAttributes().hasFnAttr(Attribute::NoInline))
- continue;
- Function *Callee = CB->getCalledFunction();
- if (!Callee)
- continue;
- if (!IH.canInline(*Callee)) {
- continue;
- }
- Worklist.push_back({CB, -1});
- }
- }
- }
- bool Changed = false;
- while (!Worklist.empty()) {
- std::pair<CallBase *, int> P = Worklist.pop_back_val();
- CallBase *CB = P.first;
- int InlineHistoryID = P.second;
- Function *Callee = CB->getCalledFunction();
- if (!Callee)
- continue;
-
- if (Callee == &F ||
- inlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) {
- ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined",
- CB->getDebugLoc(), CB->getParent())
- << "'" << ore::NV("Callee", Callee) << "' is not inlined into '"
- << ore::NV("Caller", CB->getCaller())
- << "': recursive call during flattening";
- });
- continue;
- }
+/// Policy for flattenFunction template used by AlwaysInliner.
+class AlwaysInlinerFlattenPolicy {
+ InlinerHelper &IH;
+ function_ref<TargetTransformInfo &(Function &)> GetTTI;
- if (!IH.canInline(*Callee))
- continue;
+public:
+ AlwaysInlinerFlattenPolicy(
+ InlinerHelper &IH, function_ref<TargetTransformInfo &(Function &)> GetTTI)
+ : IH(IH), GetTTI(GetTTI) {}
+ bool canInlineCall(Function &F, CallBase &CB) {
+ Function *Callee = CB.getCalledFunction();
+ if (!Callee || !IH.canInline(*Callee))
+ return false;
// Use TTI to check for target-specific hard inlining restrictions.
// This includes checks like:
// - Cannot inline streaming callee into non-streaming caller
@@ -198,18 +146,19 @@ bool flattenFunction(Function &F, InlinerHelper &IH,
// For flatten, we respect the user's intent to inline as much as possible,
// but these are fundamental ABI violations that cannot be worked around.
TargetTransformInfo &TTI = GetTTI(*Callee);
- if (!TTI.areInlineCompatible(&F, Callee))
- continue;
+ return TTI.areInlineCompatible(&F, Callee);
+ }
- if (IH.tryInline(*CB, "flatten attribute")) {
- Changed = true;
- IH.addToMaybeInlinedFunctions(*Callee);
- IH.addNewCallsToWorklist(Worklist, InlineHistoryID, InlineHistory,
- Callee);
+ bool doInline(Function &F, CallBase &CB, Function &Callee) {
+ if (IH.tryInline(CB, "flatten attribute")) {
+ IH.addToMaybeInlinedFunctions(Callee);
+ return true;
}
+ return false;
}
- return Changed;
-}
+
+ ArrayRef<CallBase *> getNewCallSites() { return IH.getInlinedCallSites(); }
+};
bool AlwaysInlineImpl(
Module &M, bool InsertLifetime, ProfileSummaryInfo &PSI,
@@ -249,8 +198,11 @@ bool AlwaysInlineImpl(
// Only call flattenFunction (which uses TTI) if there are functions to
// flatten. This ensures TTI analysis is not requested at -O0 when there are
// no flatten functions, avoiding any overhead.
- for (Function *F : NeedFlattening)
- Changed |= flattenFunction(*F, IH, GetTTI);
+ for (Function *F : NeedFlattening) {
+ AlwaysInlinerFlattenPolicy Policy(IH, GetTTI);
+ OptimizationRemarkEmitter ORE(F);
+ Changed |= flattenFunction(*F, Policy, ORE);
+ }
Changed |= IH.postInlinerCleanup();
return Changed;
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
index 533af266f1f61..b0ce7eb457a84 100644
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -53,6 +53,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO/InliningUtils.h"
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -142,21 +143,6 @@ static cl::opt<CallSiteFormat::Format> CGSCCInlineReplayFormat(
"<Line Number>:<Column Number>.<Discriminator> (default)")),
cl::desc("How cgscc inline replay file is formatted"), cl::Hidden);
-/// Return true if the specified inline history ID
-/// indicates an inline history that includes the specified function.
-static bool inlineHistoryIncludes(
- Function *F, int InlineHistoryID,
- const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
- while (InlineHistoryID != -1) {
- assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
- "Invalid inline history ID");
- if (InlineHistory[InlineHistoryID].first == F)
- return true;
- InlineHistoryID = InlineHistory[InlineHistoryID].second;
- }
- return false;
-}
-
InlineAdvisor &
InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
FunctionAnalysisManager &FAM, Module &M) {
@@ -195,100 +181,59 @@ InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
return *IAA->getAdvisor();
}
-/// Flatten a function by inlining all calls within it recursively.
-/// This implements the flatten attribute behavior for the CGSCC inliner.
-/// Returns true if any inlining was performed.
-static bool flattenFunction(Function &F, FunctionAnalysisManager &FAM,
- ProfileSummaryInfo *PSI, InlineAdvisor &Advisor) {
- SmallVector<std::pair<CallBase *, int>, 16> Worklist;
- SmallVector<std::pair<Function *, int>, 16> InlineHistory;
-
- auto GetAssumptionCache = [&](Function &Fn) -> AssumptionCache & {
- return FAM.getResult<AssumptionAnalysis>(Fn);
- };
-
- // Collect initial calls.
- for (BasicBlock &BB : F) {
- for (Instruction &I : BB) {
- if (auto *CB = dyn_cast<CallBase>(&I)) {
- if (CB->getAttributes().hasFnAttr(Attribute::NoInline))
- continue;
- Function *Callee = CB->getCalledFunction();
- if (!Callee || Callee->isDeclaration())
- continue;
- Worklist.push_back({CB, -1});
- }
- }
+/// Policy for flattenFunction template used by CGSCC Inliner.
+class CGSCCInlinerFlattenPolicy {
+ FunctionAnalysisManager &FAM;
+ InlineAdvisor &Advisor;
+
+ std::function<AssumptionCache &(Function &)> GetAssumptionCache;
+ InlineFunctionInfo IFI;
+
+public:
+ CGSCCInlinerFlattenPolicy(FunctionAnalysisManager &FAM,
+ ProfileSummaryInfo *PSI, InlineAdvisor &Advisor)
+ : FAM(FAM), Advisor(Advisor),
+ GetAssumptionCache([&FAM](Function &Fn) -> AssumptionCache & {
+ return FAM.getResult<AssumptionAnalysis>(Fn);
+ }),
+ IFI(GetAssumptionCache, PSI) {}
+
+ bool canInlineCall(Function &F, CallBase &CB) {
+ // This is called both during initial collection and during worklist
+ // processing. We only do cheap checks here - the advisor is called
+ // in doInline to avoid creating InlineAdvice objects that might not
+ // be properly recorded.
+ Function *Callee = CB.getCalledFunction();
+ if (!Callee || Callee->isDeclaration())
+ return false;
+ return isInlineViable(*Callee).isSuccess();
}
- bool Changed = false;
- while (!Worklist.empty()) {
- std::pair<CallBase *, int> P = Worklist.pop_back_val();
- CallBase *CB = P.first;
- int InlineHistoryID = P.second;
- Function *Callee = CB->getCalledFunction();
- if (!Callee)
- continue;
-
- // Detect recursion.
- if (Callee == &F ||
- inlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) {
- LLVM_DEBUG(dbgs() << "Skipping recursive call during flattening: "
- << F.getName() << " -> " << Callee->getName() << "\n");
- setInlineRemark(*CB, "recursive");
- auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
- ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined",
- CB->getDebugLoc(), CB->getParent())
- << "'" << ore::NV("Callee", Callee) << "' is not inlined into '"
- << ore::NV("Caller", CB->getCaller())
- << "': recursive call during flattening";
- });
- continue;
- }
-
+ bool doInline(Function &F, CallBase &CB, Function &Callee) {
// Use the advisor to check viability without performing cost analysis.
// For flatten, we want to inline all viable calls regardless of cost.
- std::unique_ptr<InlineAdvice> Advice = Advisor.getAdviceWithoutCost(*CB);
+ std::unique_ptr<InlineAdvice> Advice = Advisor.getAdviceWithoutCost(CB);
if (!Advice)
- continue;
-
+ return false;
if (!Advice->isInliningRecommended()) {
Advice->recordUnattemptedInlining();
- continue;
+ return false;
}
- InlineFunctionInfo IFI(GetAssumptionCache, PSI);
-
+ IFI.reset();
InlineResult IR =
- InlineFunction(*CB, IFI, /*MergeAttributes=*/true,
+ InlineFunction(CB, IFI, /*MergeAttributes=*/true,
&FAM.getResult<AAManager>(F), /*InsertLifetime=*/true);
if (!IR.isSuccess()) {
Advice->recordUnsuccessfulInlining(IR);
- continue;
+ return false;
}
-
Advice->recordInlining();
- Changed = true;
-
- // Add new call sites from the inlined function to the worklist.
- if (!IFI.InlinedCallSites.empty()) {
- int NewHistoryID = InlineHistory.size();
- InlineHistory.push_back({Callee, InlineHistoryID});
- for (CallBase *ICB : IFI.InlinedCallSites) {
- Function *NewCallee = ICB->getCalledFunction();
- if (NewCallee && !NewCallee->isDeclaration() &&
- !ICB->getAttributes().hasFnAttr(Attribute::NoInline))
- Worklist.push_back({ICB, NewHistoryID});
- }
- }
+ return true;
}
- if (Changed)
- FAM.invalidate(F, PreservedAnalyses::none());
-
- return Changed;
-}
+ ArrayRef<CallBase *> getNewCallSites() { return IFI.InlinedCallSites; }
+};
void makeFunctionBodyUnreachable(Function &F) {
F.dropAllReferences();
@@ -638,8 +583,15 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
}
// Now flatten functions with the flatten attribute.
- for (Function *FlattenF : FlattenFunctions)
- Changed |= flattenFunction(*FlattenF, FAM, PSI, Advisor);
+ for (Function *FlattenF : FlattenFunctions) {
+ CGSCCInlinerFlattenPolicy Policy(FAM, PSI, Advisor);
+ OptimizationRemarkEmitter &ORE =
+ FAM.getResult<OptimizationRemarkEmitterAnalysis>(*FlattenF);
+ bool FlattenChanged = flattenFunction(*FlattenF, Policy, ORE);
+ if (FlattenChanged)
+ FAM.invalidate(*FlattenF, PreservedAnalyses::none());
+ Changed |= FlattenChanged;
+ }
// We must ensure that we only delete functions with comdats if every function
// in the comdat is going to be deleted.
>From ff0cfaf9eb2ad1de78c96f36250bec9df3f783c7 Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Thu, 22 Jan 2026 18:26:04 -0800
Subject: [PATCH 08/16] Change flatten-sme test to check Inliner as well
---
llvm/test/Transforms/Inline/AArch64/flatten-sme.ll | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/test/Transforms/Inline/AArch64/flatten-sme.ll b/llvm/test/Transforms/Inline/AArch64/flatten-sme.ll
index e85902650487e..dcb82e995b69f 100644
--- a/llvm/test/Transforms/Inline/AArch64/flatten-sme.ll
+++ b/llvm/test/Transforms/Inline/AArch64/flatten-sme.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+sme -S -passes=always-inline | FileCheck %s
+; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+sme -S -passes=inline | FileCheck %s
; Test that flatten attribute respects ABI restrictions for SME.
; Streaming callee cannot be inlined into non-streaming caller.
>From 24f5f56b892059843a52d51b9760d3ba191ef367 Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Wed, 18 Feb 2026 15:07:32 -0800
Subject: [PATCH 09/16] Simplify flatten attribute handling by removing
template and policy classes
Remove flattenFunction template and CGSCCInlinerFlattenPolicy /
AlwaysInlinerFlattenPolicy classes. Handle flatten directly in the
CGSCC Inliner's main loop via getAdviceWithoutCost and with a local
worklist in AlwaysInliner.
---
.../llvm/Transforms/IPO/InliningUtils.h | 79 -------------
llvm/lib/Transforms/IPO/AlwaysInliner.cpp | 105 +++++++++++-------
llvm/lib/Transforms/IPO/Inliner.cpp | 78 +------------
3 files changed, 70 insertions(+), 192 deletions(-)
diff --git a/llvm/include/llvm/Transforms/IPO/InliningUtils.h b/llvm/include/llvm/Transforms/IPO/InliningUtils.h
index 520c9d8343c12..021ff00cdd202 100644
--- a/llvm/include/llvm/Transforms/IPO/InliningUtils.h
+++ b/llvm/include/llvm/Transforms/IPO/InliningUtils.h
@@ -14,9 +14,7 @@
#define LLVM_TRANSFORMS_IPO_INLININGUTILS_H
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
namespace llvm {
@@ -37,83 +35,6 @@ inline bool inlineHistoryIncludes(
return false;
}
-/// Flatten a function by inlining all calls recursively.
-///
-/// PolicyT must provide:
-/// - bool canInlineCall(Function &F, CallBase &CB): Check if call can be
-/// inlined into F
-/// - bool doInline(Function &F, CallBase &CB, Function &Callee): Perform
-/// the inline, return true on success
-/// - ArrayRef<CallBase *> getNewCallSites(): Get call sites from last inline
-///
-/// Returns true if any inlining was performed.
-template <typename PolicyT>
-bool flattenFunction(Function &F, PolicyT &Policy,
- OptimizationRemarkEmitter &ORE) {
- SmallVector<std::pair<CallBase *, int>, 16> Worklist;
- SmallVector<std::pair<Function *, int>, 16> InlineHistory;
-
- // Collect initial calls.
- for (BasicBlock &BB : F) {
- for (Instruction &I : BB) {
- if (auto *CB = dyn_cast<CallBase>(&I)) {
- if (CB->getAttributes().hasFnAttr(Attribute::NoInline))
- continue;
- Function *Callee = CB->getCalledFunction();
- if (!Callee || Callee->isDeclaration())
- continue;
- Worklist.push_back({CB, -1});
- }
- }
- }
-
- bool Changed = false;
- while (!Worklist.empty()) {
- auto Item = Worklist.pop_back_val();
- CallBase *CB = Item.first;
- int InlineHistoryID = Item.second;
- Function *Callee = CB->getCalledFunction();
- if (!Callee)
- continue;
-
- // Detect recursion.
- if (Callee == &F ||
- inlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) {
- ORE.emit([&]() {
- return OptimizationRemarkMissed("inline", "NotInlined",
- CB->getDebugLoc(), CB->getParent())
- << "'" << ore::NV("Callee", Callee) << "' is not inlined into '"
- << ore::NV("Caller", CB->getCaller())
- << "': recursive call during flattening";
- });
- continue;
- }
-
- if (!Policy.canInlineCall(F, *CB))
- continue;
-
- if (!Policy.doInline(F, *CB, *Callee))
- continue;
-
- Changed = true;
-
- // Add new call sites from the inlined function to the worklist.
- ArrayRef<CallBase *> NewCallSites = Policy.getNewCallSites();
- if (!NewCallSites.empty()) {
- int NewHistoryID = InlineHistory.size();
- InlineHistory.push_back({Callee, InlineHistoryID});
- for (CallBase *NewCB : NewCallSites) {
- Function *NewCallee = NewCB->getCalledFunction();
- if (NewCallee && !NewCallee->isDeclaration() &&
- !NewCB->getAttributes().hasFnAttr(Attribute::NoInline))
- Worklist.push_back({NewCB, NewHistoryID});
- }
- }
- }
-
- return Changed;
-}
-
} // namespace llvm
#endif // LLVM_TRANSFORMS_IPO_INLININGUTILS_H
diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index 52adf0853db91..d27eb9d777028 100644
--- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -125,41 +125,6 @@ class InlinerHelper {
}
};
-/// Policy for flattenFunction template used by AlwaysInliner.
-class AlwaysInlinerFlattenPolicy {
- InlinerHelper &IH;
- function_ref<TargetTransformInfo &(Function &)> GetTTI;
-
-public:
- AlwaysInlinerFlattenPolicy(
- InlinerHelper &IH, function_ref<TargetTransformInfo &(Function &)> GetTTI)
- : IH(IH), GetTTI(GetTTI) {}
-
- bool canInlineCall(Function &F, CallBase &CB) {
- Function *Callee = CB.getCalledFunction();
- if (!Callee || !IH.canInline(*Callee))
- return false;
- // Use TTI to check for target-specific hard inlining restrictions.
- // This includes checks like:
- // - Cannot inline streaming callee into non-streaming caller
- // - Cannot inline functions that create new ZA/ZT0 state
- // For flatten, we respect the user's intent to inline as much as possible,
- // but these are fundamental ABI violations that cannot be worked around.
- TargetTransformInfo &TTI = GetTTI(*Callee);
- return TTI.areInlineCompatible(&F, Callee);
- }
-
- bool doInline(Function &F, CallBase &CB, Function &Callee) {
- if (IH.tryInline(CB, "flatten attribute")) {
- IH.addToMaybeInlinedFunctions(Callee);
- return true;
- }
- return false;
- }
-
- ArrayRef<CallBase *> getNewCallSites() { return IH.getInlinedCallSites(); }
-};
-
bool AlwaysInlineImpl(
Module &M, bool InsertLifetime, ProfileSummaryInfo &PSI,
FunctionAnalysisManager *FAM,
@@ -195,13 +160,73 @@ bool AlwaysInlineImpl(
}
}
- // Only call flattenFunction (which uses TTI) if there are functions to
- // flatten. This ensures TTI analysis is not requested at -O0 when there are
- // no flatten functions, avoiding any overhead.
+ // Flatten functions with the flatten attribute using a local worklist.
for (Function *F : NeedFlattening) {
- AlwaysInlinerFlattenPolicy Policy(IH, GetTTI);
+ SmallVector<std::pair<CallBase *, int>, 16> Worklist;
+ SmallVector<std::pair<Function *, int>, 16> InlineHistory;
OptimizationRemarkEmitter ORE(F);
- Changed |= flattenFunction(*F, Policy, ORE);
+
+ // Collect initial calls.
+ for (BasicBlock &BB : *F)
+ for (Instruction &I : BB)
+ if (auto *CB = dyn_cast<CallBase>(&I)) {
+ if (CB->getAttributes().hasFnAttr(Attribute::NoInline))
+ continue;
+ Function *Callee = CB->getCalledFunction();
+ if (!Callee || Callee->isDeclaration())
+ continue;
+ Worklist.push_back({CB, -1});
+ }
+
+ while (!Worklist.empty()) {
+ auto Item = Worklist.pop_back_val();
+ CallBase *CB = Item.first;
+ int InlineHistoryID = Item.second;
+ Function *Callee = CB->getCalledFunction();
+ if (!Callee)
+ continue;
+
+ // Detect recursion.
+ if (Callee == F ||
+ inlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) {
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed("inline", "NotInlined",
+ CB->getDebugLoc(), CB->getParent())
+ << "'" << ore::NV("Callee", Callee)
+ << "' is not inlined into '"
+ << ore::NV("Caller", CB->getCaller())
+ << "': recursive call during flattening";
+ });
+ continue;
+ }
+
+ if (!IH.canInline(*Callee))
+ continue;
+
+ // Check TTI for target-specific inlining restrictions (e.g., SME ABI).
+ TargetTransformInfo &TTI = GetTTI(*Callee);
+ if (!TTI.areInlineCompatible(F, Callee))
+ continue;
+
+ if (!IH.tryInline(*CB, "flatten attribute"))
+ continue;
+
+ IH.addToMaybeInlinedFunctions(*Callee);
+ Changed = true;
+
+ // Add new call sites from the inlined function to the worklist.
+ ArrayRef<CallBase *> NewCallSites = IH.getInlinedCallSites();
+ if (!NewCallSites.empty()) {
+ int NewHistoryID = InlineHistory.size();
+ InlineHistory.push_back({Callee, InlineHistoryID});
+ for (CallBase *NewCB : NewCallSites) {
+ Function *NewCallee = NewCB->getCalledFunction();
+ if (NewCallee && !NewCallee->isDeclaration() &&
+ !NewCB->getAttributes().hasFnAttr(Attribute::NoInline))
+ Worklist.push_back({NewCB, NewHistoryID});
+ }
+ }
+ }
}
Changed |= IH.postInlinerCleanup();
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
index b0ce7eb457a84..b7d5ee367d33b 100644
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -181,60 +181,6 @@ InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
return *IAA->getAdvisor();
}
-/// Policy for flattenFunction template used by CGSCC Inliner.
-class CGSCCInlinerFlattenPolicy {
- FunctionAnalysisManager &FAM;
- InlineAdvisor &Advisor;
-
- std::function<AssumptionCache &(Function &)> GetAssumptionCache;
- InlineFunctionInfo IFI;
-
-public:
- CGSCCInlinerFlattenPolicy(FunctionAnalysisManager &FAM,
- ProfileSummaryInfo *PSI, InlineAdvisor &Advisor)
- : FAM(FAM), Advisor(Advisor),
- GetAssumptionCache([&FAM](Function &Fn) -> AssumptionCache & {
- return FAM.getResult<AssumptionAnalysis>(Fn);
- }),
- IFI(GetAssumptionCache, PSI) {}
-
- bool canInlineCall(Function &F, CallBase &CB) {
- // This is called both during initial collection and during worklist
- // processing. We only do cheap checks here - the advisor is called
- // in doInline to avoid creating InlineAdvice objects that might not
- // be properly recorded.
- Function *Callee = CB.getCalledFunction();
- if (!Callee || Callee->isDeclaration())
- return false;
- return isInlineViable(*Callee).isSuccess();
- }
-
- bool doInline(Function &F, CallBase &CB, Function &Callee) {
- // Use the advisor to check viability without performing cost analysis.
- // For flatten, we want to inline all viable calls regardless of cost.
- std::unique_ptr<InlineAdvice> Advice = Advisor.getAdviceWithoutCost(CB);
- if (!Advice)
- return false;
- if (!Advice->isInliningRecommended()) {
- Advice->recordUnattemptedInlining();
- return false;
- }
-
- IFI.reset();
- InlineResult IR =
- InlineFunction(CB, IFI, /*MergeAttributes=*/true,
- &FAM.getResult<AAManager>(F), /*InsertLifetime=*/true);
- if (!IR.isSuccess()) {
- Advice->recordUnsuccessfulInlining(IR);
- return false;
- }
- Advice->recordInlining();
- return true;
- }
-
- ArrayRef<CallBase *> getNewCallSites() { return IFI.InlinedCallSites; }
-};
-
void makeFunctionBodyUnreachable(Function &F) {
F.dropAllReferences();
for (BasicBlock &BB : make_early_inc_range(F))
@@ -288,15 +234,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// incrementally maknig a single function grow in a super linear fashion.
SmallVector<std::pair<CallBase *, int>, 16> Calls;
- // Track functions with flatten attribute for processing at the end.
- SmallSetVector<Function *, 4> FlattenFunctions;
-
// Populate the initial list of calls in this SCC.
for (auto &N : InitialC) {
- Function &Fn = N.getFunction();
- if (Fn.hasFnAttribute(Attribute::Flatten))
- FlattenFunctions.insert(&Fn);
-
auto &ORE =
FAM.getResult<OptimizationRemarkEmitterAnalysis>(N.getFunction());
// We want to generally process call sites top-down in order for
@@ -405,8 +344,12 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
continue;
}
+ // For flatten callers, inline all viable calls without cost analysis.
+ bool IsFlatten = F.hasFnAttribute(Attribute::Flatten) &&
+ !CB->getAttributes().hasFnAttr(Attribute::NoInline);
std::unique_ptr<InlineAdvice> Advice =
- Advisor.getAdvice(*CB, OnlyMandatory);
+ IsFlatten ? Advisor.getAdviceWithoutCost(*CB)
+ : Advisor.getAdvice(*CB, OnlyMandatory);
// Check whether we want to inline this callsite.
if (!Advice)
@@ -582,17 +525,6 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
FAM.invalidate(F, PreservedAnalyses::none());
}
- // Now flatten functions with the flatten attribute.
- for (Function *FlattenF : FlattenFunctions) {
- CGSCCInlinerFlattenPolicy Policy(FAM, PSI, Advisor);
- OptimizationRemarkEmitter &ORE =
- FAM.getResult<OptimizationRemarkEmitterAnalysis>(*FlattenF);
- bool FlattenChanged = flattenFunction(*FlattenF, Policy, ORE);
- if (FlattenChanged)
- FAM.invalidate(*FlattenF, PreservedAnalyses::none());
- Changed |= FlattenChanged;
- }
-
// We must ensure that we only delete functions with comdats if every function
// in the comdat is going to be deleted.
if (!DeadFunctionsInComdats.empty()) {
>From acbb51739faf6411f9a1e97a34703c73008fa411 Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Tue, 24 Feb 2026 14:58:03 -0800
Subject: [PATCH 10/16] Handle flatten in getAttributeBasedInliningDecision
instead of getAdviceWithoutCost
---
llvm/include/llvm/Analysis/InlineAdvisor.h | 7 -------
llvm/lib/Analysis/InlineAdvisor.cpp | 11 -----------
llvm/lib/Analysis/InlineCost.cpp | 8 ++++++++
llvm/lib/Transforms/IPO/Inliner.cpp | 6 +-----
4 files changed, 9 insertions(+), 23 deletions(-)
diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h
index d45b2c6dcd079..50ba3c13da70f 100644
--- a/llvm/include/llvm/Analysis/InlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/InlineAdvisor.h
@@ -180,13 +180,6 @@ class LLVM_ABI InlineAdvisor {
std::unique_ptr<InlineAdvice> getAdvice(CallBase &CB,
bool MandatoryOnly = false);
- /// Get an InlineAdvice for a call site without performing cost analysis.
- /// This is useful for cases like the flatten attribute where we want to
- /// inline all viable calls regardless of cost. The viability checks
- /// (TTI compatibility, noinline attributes, etc.) are still performed.
- /// Returns advice with isInliningRecommended() = true if the call is viable.
- std::unique_ptr<InlineAdvice> getAdviceWithoutCost(CallBase &CB);
-
/// This must be called when the Inliner pass is entered, to allow the
/// InlineAdvisor update internal state, as result of function passes run
/// between Inliner pass runs (for the same module).
diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp
index abee47a484eb7..1fb2f7e780031 100644
--- a/llvm/lib/Analysis/InlineAdvisor.cpp
+++ b/llvm/lib/Analysis/InlineAdvisor.cpp
@@ -654,17 +654,6 @@ std::unique_ptr<InlineAdvice> InlineAdvisor::getAdvice(CallBase &CB,
return getMandatoryAdvice(CB, Advice);
}
-std::unique_ptr<InlineAdvice>
-InlineAdvisor::getAdviceWithoutCost(CallBase &CB) {
- // Check if the call is viable for inlining without performing cost analysis.
- // This is useful for cases like the flatten attribute where we want to
- // inline all viable calls regardless of cost.
- bool IsViable = CB.getCaller() != CB.getCalledFunction() &&
- MandatoryInliningKind::Never !=
- getMandatoryKind(CB, FAM, getCallerORE(CB));
- return getMandatoryAdvice(CB, IsViable);
-}
-
OptimizationRemarkEmitter &InlineAdvisor::getCallerORE(CallBase &CB) {
return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*CB.getCaller());
}
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index e0054e3ed6ee2..f949ac94b99cc 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -3254,6 +3254,14 @@ std::optional<InlineResult> llvm::getAttributeBasedInliningDecision(
if (Callee->hasFnAttribute("loader-replaceable"))
return InlineResult::failure("loader replaceable function attribute");
+ // Flatten: inline all viable calls from flatten functions regardless of cost.
+ if (Caller->hasFnAttribute(Attribute::Flatten)) {
+ auto IsViable = isInlineViable(*Callee);
+ if (IsViable.isSuccess())
+ return InlineResult::success();
+ return InlineResult::failure(IsViable.getFailureReason());
+ }
+
return std::nullopt;
}
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
index b7d5ee367d33b..af11c5415f795 100644
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -344,12 +344,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
continue;
}
- // For flatten callers, inline all viable calls without cost analysis.
- bool IsFlatten = F.hasFnAttribute(Attribute::Flatten) &&
- !CB->getAttributes().hasFnAttr(Attribute::NoInline);
std::unique_ptr<InlineAdvice> Advice =
- IsFlatten ? Advisor.getAdviceWithoutCost(*CB)
- : Advisor.getAdvice(*CB, OnlyMandatory);
+ Advisor.getAdvice(*CB, OnlyMandatory);
// Check whether we want to inline this callsite.
if (!Advice)
>From 88ef480d74301d7d7db4db83e197ed7abd78c59e Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Tue, 24 Feb 2026 15:23:47 -0800
Subject: [PATCH 11/16] Move inlineHistoryIncludes from InliningUtils.h to
Cloning.h/InlineFunction.cpp
---
.../llvm/Transforms/IPO/InliningUtils.h | 40 -------------------
llvm/include/llvm/Transforms/Utils/Cloning.h | 8 ++++
llvm/lib/Transforms/IPO/AlwaysInliner.cpp | 1 -
llvm/lib/Transforms/IPO/Inliner.cpp | 1 -
llvm/lib/Transforms/Utils/InlineFunction.cpp | 13 ++++++
5 files changed, 21 insertions(+), 42 deletions(-)
delete mode 100644 llvm/include/llvm/Transforms/IPO/InliningUtils.h
diff --git a/llvm/include/llvm/Transforms/IPO/InliningUtils.h b/llvm/include/llvm/Transforms/IPO/InliningUtils.h
deleted file mode 100644
index 021ff00cdd202..0000000000000
--- a/llvm/include/llvm/Transforms/IPO/InliningUtils.h
+++ /dev/null
@@ -1,40 +0,0 @@
-//===- InliningUtils.h - Shared inlining utilities -------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines shared utilities used by the inliner passes.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TRANSFORMS_IPO_INLININGUTILS_H
-#define LLVM_TRANSFORMS_IPO_INLININGUTILS_H
-
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/Function.h"
-
-namespace llvm {
-
-/// Check if Function F appears in the inline history chain.
-/// InlineHistory is a vector of (Function, ParentHistoryID) pairs.
-/// Returns true if F was already inlined in the chain leading to
-/// InlineHistoryID.
-inline bool inlineHistoryIncludes(
- Function *F, int InlineHistoryID,
- const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
- while (InlineHistoryID != -1) {
- assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
- "Invalid inline history ID");
- if (InlineHistory[InlineHistoryID].first == F)
- return true;
- InlineHistoryID = InlineHistory[InlineHistoryID].second;
- }
- return false;
-}
-
-} // namespace llvm
-
-#endif // LLVM_TRANSFORMS_IPO_INLININGUTILS_H
diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h
index cfa06a5be79fd..b0df674e9b0d7 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -439,6 +439,14 @@ LLVM_ABI void cloneAndAdaptNoAliasScopes(ArrayRef<MDNode *> NoAliasDeclScopes,
LLVM_ABI void cloneAndAdaptNoAliasScopes(ArrayRef<MDNode *> NoAliasDeclScopes,
Instruction *IStart, Instruction *IEnd,
LLVMContext &Context, StringRef Ext);
+/// Check if Function F appears in the inline history chain.
+/// InlineHistory is a vector of (Function, ParentHistoryID) pairs.
+/// Returns true if F was already inlined in the chain leading to
+/// InlineHistoryID.
+LLVM_ABI bool inlineHistoryIncludes(
+ Function *F, int InlineHistoryID,
+ const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory);
+
} // end namespace llvm
#endif // LLVM_TRANSFORMS_UTILS_CLONING_H
diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index d27eb9d777028..c0c3cfee8af0d 100644
--- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -22,7 +22,6 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Transforms/IPO/InliningUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
index af11c5415f795..d795fbbbe4120 100644
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -53,7 +53,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/IPO/InliningUtils.h"
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 3230b306f17d1..a437e8d52f805 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -3443,3 +3443,16 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
return Result;
}
+
+bool llvm::inlineHistoryIncludes(
+ Function *F, int InlineHistoryID,
+ const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
+ while (InlineHistoryID != -1) {
+ assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
+ "Invalid inline history ID");
+ if (InlineHistory[InlineHistoryID].first == F)
+ return true;
+ InlineHistoryID = InlineHistory[InlineHistoryID].second;
+ }
+ return false;
+}
>From 6e2f08c96e59a1de8c73a718db08b7c41803db83 Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Wed, 25 Feb 2026 09:08:44 -0800
Subject: [PATCH 12/16] Remove InlinerHelper class, use local TryInline lambda
instead
---
llvm/lib/Transforms/IPO/AlwaysInliner.cpp | 142 ++++++++--------------
1 file changed, 51 insertions(+), 91 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index c0c3cfee8af0d..629befa66cfbc 100644
--- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -31,38 +31,26 @@ using namespace llvm;
namespace {
-class InlinerHelper {
- Module &M;
- FunctionAnalysisManager *FAM;
- function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
- function_ref<AAResults &(Function &)> GetAAR;
- bool InsertLifetime;
-
- SmallSetVector<Function *, 16> MaybeInlinedFunctions;
- InlineFunctionInfo IFI;
-
-public:
- InlinerHelper(Module &M, ProfileSummaryInfo &PSI,
- FunctionAnalysisManager *FAM,
- function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
- function_ref<AAResults &(Function &)> GetAAR,
- bool InsertLifetime)
- : M(M), FAM(FAM), GetAssumptionCache(GetAssumptionCache), GetAAR(GetAAR),
- InsertLifetime(InsertLifetime), IFI(GetAssumptionCache, &PSI) {}
-
- bool canInline(Function &F) {
- return !F.isPresplitCoroutine() && !F.isDeclaration() &&
- isInlineViable(F).isSuccess();
- }
+bool AlwaysInlineImpl(
+ Module &M, bool InsertLifetime, ProfileSummaryInfo &PSI,
+ FunctionAnalysisManager *FAM,
+ function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
+ function_ref<AAResults &(Function &)> GetAAR,
+ function_ref<TargetTransformInfo &(Function &)> GetTTI) {
+ SmallSetVector<CallBase *, 16> Calls;
+ bool Changed = false;
+ SmallVector<Function *, 16> InlinedComdatFunctions;
+ SmallVector<Function *, 4> NeedFlattening;
- bool tryInline(CallBase &CB, StringRef InlignReason) {
- IFI.reset();
- Function &Callee = *CB.getCalledFunction();
+ auto TryInline = [&](CallBase &CB, Function &Callee,
+ OptimizationRemarkEmitter &ORE, const char *InlineReason,
+ SmallVectorImpl<CallBase *> *NewCallSites =
+ nullptr) -> bool {
Function *Caller = CB.getCaller();
- OptimizationRemarkEmitter ORE(Caller);
DebugLoc DLoc = CB.getDebugLoc();
BasicBlock *Block = CB.getParent();
+ InlineFunctionInfo IFI(GetAssumptionCache, &PSI);
InlineResult Res = InlineFunction(CB, IFI, /*MergeAttributes=*/true,
&GetAAR(Callee), InsertLifetime);
if (!Res.isSuccess()) {
@@ -76,74 +64,24 @@ class InlinerHelper {
}
emitInlinedIntoBasedOnCost(ORE, DLoc, Block, Callee, *Caller,
- InlineCost::getAlways(InlignReason.data()),
+ InlineCost::getAlways(InlineReason),
/*ForProfileContext=*/false, DEBUG_TYPE);
if (FAM)
FAM->invalidate(*Caller, PreservedAnalyses::none());
+ if (NewCallSites)
+ *NewCallSites = std::move(IFI.InlinedCallSites);
return true;
- }
-
- ArrayRef<CallBase *> getInlinedCallSites() const {
- return IFI.InlinedCallSites;
- }
-
- void addToMaybeInlinedFunctions(Function &F) {
- MaybeInlinedFunctions.insert(&F);
- }
-
- bool postInlinerCleanup() {
- SmallVector<Function *, 16> InlinedComdatFunctions;
- bool Changed = false;
- for (Function *F : MaybeInlinedFunctions) {
- F->removeDeadConstantUsers();
- if (F->hasFnAttribute(Attribute::AlwaysInline) &&
- F->isDefTriviallyDead()) {
- if (F->hasComdat()) {
- InlinedComdatFunctions.push_back(F);
- } else {
- if (FAM)
- FAM->clear(*F, F->getName());
- M.getFunctionList().erase(F);
- Changed = true;
- }
- }
- }
- if (!InlinedComdatFunctions.empty()) {
- // Now we just have the comdat functions. Filter out the ones whose
- // comdats are not actually dead.
- filterDeadComdatFunctions(InlinedComdatFunctions);
- // The remaining functions are actually dead.
- for (Function *F : InlinedComdatFunctions) {
- if (FAM)
- FAM->clear(*F, F->getName());
- M.getFunctionList().erase(F);
- Changed = true;
- }
- }
- return Changed;
- }
-};
-
-bool AlwaysInlineImpl(
- Module &M, bool InsertLifetime, ProfileSummaryInfo &PSI,
- FunctionAnalysisManager *FAM,
- function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
- function_ref<AAResults &(Function &)> GetAAR,
- function_ref<TargetTransformInfo &(Function &)> GetTTI) {
- SmallSetVector<CallBase *, 16> Calls;
- InlinerHelper IH(M, PSI, FAM, GetAssumptionCache, GetAAR, InsertLifetime);
- SmallVector<Function *, 4> NeedFlattening;
-
- bool Changed = false;
- SmallVector<Function *, 16> InlinedComdatFunctions;
+ };
for (Function &F : make_early_inc_range(M)) {
if (F.hasFnAttribute(Attribute::Flatten))
NeedFlattening.push_back(&F);
- if (!IH.canInline(F))
+ if (F.isPresplitCoroutine())
+ continue;
+
+ if (F.isDeclaration() || !isInlineViable(F).isSuccess())
continue;
- IH.addToMaybeInlinedFunctions(F);
Calls.clear();
@@ -155,7 +93,20 @@ bool AlwaysInlineImpl(
Calls.insert(CB);
for (CallBase *CB : Calls) {
- Changed |= IH.tryInline(*CB, "always inline attribute");
+ OptimizationRemarkEmitter ORE(CB->getCaller());
+ Changed |= TryInline(*CB, F, ORE, "always inline attribute");
+ }
+
+ F.removeDeadConstantUsers();
+ if (F.hasFnAttribute(Attribute::AlwaysInline) && F.isDefTriviallyDead()) {
+ if (F.hasComdat()) {
+ InlinedComdatFunctions.push_back(&F);
+ } else {
+ if (FAM)
+ FAM->clear(F, F.getName());
+ M.getFunctionList().erase(F);
+ Changed = true;
+ }
}
}
@@ -163,6 +114,7 @@ bool AlwaysInlineImpl(
for (Function *F : NeedFlattening) {
SmallVector<std::pair<CallBase *, int>, 16> Worklist;
SmallVector<std::pair<Function *, int>, 16> InlineHistory;
+ SmallVector<CallBase *> NewCallSites;
OptimizationRemarkEmitter ORE(F);
// Collect initial calls.
@@ -199,7 +151,8 @@ bool AlwaysInlineImpl(
continue;
}
- if (!IH.canInline(*Callee))
+ if (Callee->isPresplitCoroutine() || Callee->isDeclaration() ||
+ !isInlineViable(*Callee).isSuccess())
continue;
// Check TTI for target-specific inlining restrictions (e.g., SME ABI).
@@ -207,14 +160,12 @@ bool AlwaysInlineImpl(
if (!TTI.areInlineCompatible(F, Callee))
continue;
- if (!IH.tryInline(*CB, "flatten attribute"))
+ if (!TryInline(*CB, *Callee, ORE, "flatten attribute", &NewCallSites))
continue;
- IH.addToMaybeInlinedFunctions(*Callee);
Changed = true;
// Add new call sites from the inlined function to the worklist.
- ArrayRef<CallBase *> NewCallSites = IH.getInlinedCallSites();
if (!NewCallSites.empty()) {
int NewHistoryID = InlineHistory.size();
InlineHistory.push_back({Callee, InlineHistoryID});
@@ -228,7 +179,16 @@ bool AlwaysInlineImpl(
}
}
- Changed |= IH.postInlinerCleanup();
+ if (!InlinedComdatFunctions.empty()) {
+ filterDeadComdatFunctions(InlinedComdatFunctions);
+ for (Function *F : InlinedComdatFunctions) {
+ if (FAM)
+ FAM->clear(*F, F->getName());
+ M.getFunctionList().erase(F);
+ Changed = true;
+ }
+ }
+
return Changed;
}
>From 50122c434ab75c6b90e820f10240d6ed7c824ad5 Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Wed, 25 Feb 2026 09:34:44 -0800
Subject: [PATCH 13/16] Remove duplicate static inlineHistoryIncludes from
ModuleInliner.cpp
---
llvm/lib/Transforms/IPO/ModuleInliner.cpp | 15 ---------------
1 file changed, 15 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/ModuleInliner.cpp b/llvm/lib/Transforms/IPO/ModuleInliner.cpp
index 3e0bb6d1432b2..31c26c9fb8c06 100644
--- a/llvm/lib/Transforms/IPO/ModuleInliner.cpp
+++ b/llvm/lib/Transforms/IPO/ModuleInliner.cpp
@@ -56,21 +56,6 @@ static cl::opt<bool> CtxProfPromoteAlwaysInline(
"promotion for that target. If multiple targets for an indirect "
"call site fit this description, they are all promoted."));
-/// Return true if the specified inline history ID
-/// indicates an inline history that includes the specified function.
-static bool inlineHistoryIncludes(
- Function *F, int InlineHistoryID,
- const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
- while (InlineHistoryID != -1) {
- assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
- "Invalid inline history ID");
- if (InlineHistory[InlineHistoryID].first == F)
- return true;
- InlineHistoryID = InlineHistory[InlineHistoryID].second;
- }
- return false;
-}
-
InlineAdvisor &ModuleInlinerPass::getAdvisor(const ModuleAnalysisManager &MAM,
FunctionAnalysisManager &FAM,
Module &M) {
>From bf43762c3e62d41ded4546523e47ccb14b781438 Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Mon, 2 Mar 2026 11:20:46 -0800
Subject: [PATCH 14/16] Address review comments: add braces and remove
redundant check
Add braces to multi-line for loops per LLVM style and remove
redundant Callee->isDeclaration() check since both worklist entry
points already filter out declarations.
---
llvm/lib/Transforms/IPO/AlwaysInliner.cpp | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index 629befa66cfbc..5ea282a6160ec 100644
--- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -118,8 +118,8 @@ bool AlwaysInlineImpl(
OptimizationRemarkEmitter ORE(F);
// Collect initial calls.
- for (BasicBlock &BB : *F)
- for (Instruction &I : BB)
+ for (BasicBlock &BB : *F) {
+ for (Instruction &I : BB) {
if (auto *CB = dyn_cast<CallBase>(&I)) {
if (CB->getAttributes().hasFnAttr(Attribute::NoInline))
continue;
@@ -128,6 +128,8 @@ bool AlwaysInlineImpl(
continue;
Worklist.push_back({CB, -1});
}
+ }
+ }
while (!Worklist.empty()) {
auto Item = Worklist.pop_back_val();
@@ -151,8 +153,7 @@ bool AlwaysInlineImpl(
continue;
}
- if (Callee->isPresplitCoroutine() || Callee->isDeclaration() ||
- !isInlineViable(*Callee).isSuccess())
+ if (Callee->isPresplitCoroutine() || !isInlineViable(*Callee).isSuccess())
continue;
// Check TTI for target-specific inlining restrictions (e.g., SME ABI).
>From f1fedef981dbb8f5da8b3c231deac5ab34ba8325 Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Mon, 2 Mar 2026 14:27:17 -0800
Subject: [PATCH 15/16] Address review: structured bindings, restore comments,
use ArrayRef
Use structured bindings for worklist pop, restore accidentally
dropped comdat comments, and change inlineHistoryIncludes to take
ArrayRef instead of const SmallVectorImpl&.
---
llvm/include/llvm/Transforms/Utils/Cloning.h | 7 ++++---
llvm/lib/Transforms/IPO/AlwaysInliner.cpp | 7 ++++---
llvm/lib/Transforms/Utils/InlineFunction.cpp | 2 +-
3 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h
index b0df674e9b0d7..434569fac8a71 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -17,6 +17,7 @@
#ifndef LLVM_TRANSFORMS_UTILS_CLONING_H
#define LLVM_TRANSFORMS_UTILS_CLONING_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AssumptionCache.h"
@@ -443,9 +444,9 @@ LLVM_ABI void cloneAndAdaptNoAliasScopes(ArrayRef<MDNode *> NoAliasDeclScopes,
/// InlineHistory is a vector of (Function, ParentHistoryID) pairs.
/// Returns true if F was already inlined in the chain leading to
/// InlineHistoryID.
-LLVM_ABI bool inlineHistoryIncludes(
- Function *F, int InlineHistoryID,
- const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory);
+LLVM_ABI bool
+inlineHistoryIncludes(Function *F, int InlineHistoryID,
+ ArrayRef<std::pair<Function *, int>> InlineHistory);
} // end namespace llvm
diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index 5ea282a6160ec..bda1d0b9573c0 100644
--- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -132,9 +132,7 @@ bool AlwaysInlineImpl(
}
while (!Worklist.empty()) {
- auto Item = Worklist.pop_back_val();
- CallBase *CB = Item.first;
- int InlineHistoryID = Item.second;
+ auto [CB, InlineHistoryID] = Worklist.pop_back_val();
Function *Callee = CB->getCalledFunction();
if (!Callee)
continue;
@@ -181,7 +179,10 @@ bool AlwaysInlineImpl(
}
if (!InlinedComdatFunctions.empty()) {
+ // Now we just have the comdat functions. Filter out the ones whose
+ // comdats are not actually dead.
filterDeadComdatFunctions(InlinedComdatFunctions);
+ // The remaining functions are actually dead.
for (Function *F : InlinedComdatFunctions) {
if (FAM)
FAM->clear(*F, F->getName());
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index a437e8d52f805..f3415cf4d5636 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -3446,7 +3446,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
bool llvm::inlineHistoryIncludes(
Function *F, int InlineHistoryID,
- const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
+ ArrayRef<std::pair<Function *, int>> InlineHistory) {
while (InlineHistoryID != -1) {
assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
"Invalid inline history ID");
>From d18d1bb67a4137d1defcff9e345474453df6f043 Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Mon, 2 Mar 2026 14:49:58 -0800
Subject: [PATCH 16/16] Regenerate flatten-sme.ll checks with
update_test_checks.py
---
.../Transforms/Inline/AArch64/flatten-sme.ll | 65 +++++++++++++++----
1 file changed, 51 insertions(+), 14 deletions(-)
diff --git a/llvm/test/Transforms/Inline/AArch64/flatten-sme.ll b/llvm/test/Transforms/Inline/AArch64/flatten-sme.ll
index dcb82e995b69f..e3d302f5eca79 100644
--- a/llvm/test/Transforms/Inline/AArch64/flatten-sme.ll
+++ b/llvm/test/Transforms/Inline/AArch64/flatten-sme.ll
@@ -1,54 +1,91 @@
-; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+sme -S -passes=always-inline | FileCheck %s
-; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+sme -S -passes=inline | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+sme -S -passes=always-inline | FileCheck %s --check-prefixes=CHECK,ALWAYS
+; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+sme -S -passes=inline | FileCheck %s --check-prefixes=CHECK,INLINE
; Test that flatten attribute respects ABI restrictions for SME.
; Streaming callee cannot be inlined into non-streaming caller.
; new_za callee cannot be inlined at all.
define internal i32 @streaming_callee() "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: define internal i32 @streaming_callee(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: ret i32 42
+;
ret i32 42
}
define internal i32 @new_za_callee() "aarch64_new_za" {
+; CHECK-LABEL: define internal i32 @new_za_callee(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: ret i32 100
+;
ret i32 100
}
define internal i32 @normal_callee() {
+; ALWAYS-LABEL: define internal i32 @normal_callee(
+; ALWAYS-SAME: ) #[[ATTR2:[0-9]+]] {
+; ALWAYS-NEXT: ret i32 50
+;
ret i32 50
}
; Streaming callee -> non-streaming caller: should NOT be inlined (ABI violation).
define i32 @test_streaming_not_inlined() flatten {
-; CHECK-LABEL: @test_streaming_not_inlined(
-; CHECK: call i32 @streaming_callee()
-; CHECK: ret i32
+; ALWAYS-LABEL: define i32 @test_streaming_not_inlined(
+; ALWAYS-SAME: ) #[[ATTR3:[0-9]+]] {
+; ALWAYS-NEXT: [[R:%.*]] = call i32 @streaming_callee()
+; ALWAYS-NEXT: ret i32 [[R]]
+;
+; INLINE-LABEL: define i32 @test_streaming_not_inlined(
+; INLINE-SAME: ) #[[ATTR2:[0-9]+]] {
+; INLINE-NEXT: [[R:%.*]] = call i32 @streaming_callee()
+; INLINE-NEXT: ret i32 [[R]]
+;
%r = call i32 @streaming_callee()
ret i32 %r
}
; new_za callee: should NOT be inlined (ABI violation - callee allocates new ZA).
define i32 @test_new_za_not_inlined() flatten {
-; CHECK-LABEL: @test_new_za_not_inlined(
-; CHECK: call i32 @new_za_callee()
-; CHECK: ret i32
+; ALWAYS-LABEL: define i32 @test_new_za_not_inlined(
+; ALWAYS-SAME: ) #[[ATTR3]] {
+; ALWAYS-NEXT: [[R:%.*]] = call i32 @new_za_callee()
+; ALWAYS-NEXT: ret i32 [[R]]
+;
+; INLINE-LABEL: define i32 @test_new_za_not_inlined(
+; INLINE-SAME: ) #[[ATTR2]] {
+; INLINE-NEXT: [[R:%.*]] = call i32 @new_za_callee()
+; INLINE-NEXT: ret i32 [[R]]
+;
%r = call i32 @new_za_callee()
ret i32 %r
}
; Streaming caller -> streaming callee: should be inlined (compatible).
define i32 @test_streaming_to_streaming() flatten "aarch64_pstate_sm_enabled" {
-; CHECK-LABEL: @test_streaming_to_streaming(
-; CHECK-NOT: call i32 @streaming_callee
-; CHECK: ret i32 42
+; ALWAYS-LABEL: define i32 @test_streaming_to_streaming(
+; ALWAYS-SAME: ) #[[ATTR4:[0-9]+]] {
+; ALWAYS-NEXT: ret i32 42
+;
+; INLINE-LABEL: define i32 @test_streaming_to_streaming(
+; INLINE-SAME: ) #[[ATTR3:[0-9]+]] {
+; INLINE-NEXT: ret i32 42
+;
%r = call i32 @streaming_callee()
ret i32 %r
}
; Non-streaming caller -> non-streaming callee: should be inlined.
define i32 @test_normal_inlined() flatten {
-; CHECK-LABEL: @test_normal_inlined(
-; CHECK-NOT: call i32 @normal_callee
-; CHECK: ret i32 50
+; ALWAYS-LABEL: define i32 @test_normal_inlined(
+; ALWAYS-SAME: ) #[[ATTR3]] {
+; ALWAYS-NEXT: ret i32 50
+;
+; INLINE-LABEL: define i32 @test_normal_inlined(
+; INLINE-SAME: ) #[[ATTR2]] {
+; INLINE-NEXT: ret i32 50
+;
%r = call i32 @normal_callee()
ret i32 %r
}
More information about the llvm-commits
mailing list