[llvm] [LLVM] Add flatten function attribute to LLVM IR and implement recursive inlining in AlwaysInliner (PR #174899)

Grigory Pastukhov via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 2 14:53:08 PST 2026


https://github.com/grigorypas updated https://github.com/llvm/llvm-project/pull/174899

>From 14636cf3afb84c32b4ea7b71d81ca21757ea55b5 Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Wed, 7 Jan 2026 15:11:05 -0800
Subject: [PATCH 01/16] Add flatten attribute to LLVM

---
 llvm/include/llvm/Bitcode/LLVMBitCodes.h    | 1 +
 llvm/include/llvm/IR/Attributes.td          | 3 +++
 llvm/lib/Bitcode/Reader/BitcodeReader.cpp   | 2 ++
 llvm/lib/Bitcode/Writer/BitcodeWriter.cpp   | 2 ++
 llvm/lib/Transforms/Utils/CodeExtractor.cpp | 1 +
 llvm/test/Bitcode/attributes.ll             | 6 ++++++
 6 files changed, 15 insertions(+)

diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index bcf596a0d79b2..3b5e8cce05b72 100644
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -809,6 +809,7 @@ enum AttributeKindCodes {
   ATTR_KIND_NO_CREATE_UNDEF_OR_POISON = 105,
   ATTR_KIND_DENORMAL_FPENV = 106,
   ATTR_KIND_NOOUTLINE = 107,
+  ATTR_KIND_FLATTEN = 108,
 };
 
 enum ComdatSelectionKindCodes {
diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td
index 941251003f5ba..cca5d270a1f91 100644
--- a/llvm/include/llvm/IR/Attributes.td
+++ b/llvm/include/llvm/IR/Attributes.td
@@ -134,6 +134,9 @@ def DisableSanitizerInstrumentation: EnumAttr<"disable_sanitizer_instrumentation
 /// Provide pointer element type to intrinsic.
 def ElementType : TypeAttr<"elementtype", IntersectPreserve, [ParamAttr]>;
 
+/// Flatten function by recursively inlining all calls.
+def Flatten : EnumAttr<"flatten", IntersectPreserve, [FnAttr]>;
+
 /// Whether to keep return instructions, or replace with a jump to an external
 /// symbol.
 def FnRetThunkExtern : EnumAttr<"fn_ret_thunk_extern", IntersectPreserve, [FnAttr]>;
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 2c8612a9d7822..60ebe1c1f5cd2 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -2094,6 +2094,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
     return Attribute::ElementType;
   case bitc::ATTR_KIND_FNRETTHUNK_EXTERN:
     return Attribute::FnRetThunkExtern;
+  case bitc::ATTR_KIND_FLATTEN:
+    return Attribute::Flatten;
   case bitc::ATTR_KIND_INLINE_HINT:
     return Attribute::InlineHint;
   case bitc::ATTR_KIND_IN_REG:
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 6b332554bddd4..bb58e21fa15d8 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -774,6 +774,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
     return bitc::ATTR_KIND_DISABLE_SANITIZER_INSTRUMENTATION;
   case Attribute::FnRetThunkExtern:
     return bitc::ATTR_KIND_FNRETTHUNK_EXTERN;
+  case Attribute::Flatten:
+    return bitc::ATTR_KIND_FLATTEN;
   case Attribute::Hot:
     return bitc::ATTR_KIND_HOT;
   case Attribute::ElementType:
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index b298a8ae144d8..44c33d4f5a769 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -938,6 +938,7 @@ Function *CodeExtractor::constructFunctionDeclaration(
       case Attribute::AlwaysInline:
       case Attribute::Cold:
       case Attribute::DisableSanitizerInstrumentation:
+      case Attribute::Flatten:
       case Attribute::FnRetThunkExtern:
       case Attribute::Hot:
       case Attribute::HybridPatchable:
diff --git a/llvm/test/Bitcode/attributes.ll b/llvm/test/Bitcode/attributes.ll
index 4f234d7a40079..21712fae7eecd 100644
--- a/llvm/test/Bitcode/attributes.ll
+++ b/llvm/test/Bitcode/attributes.ll
@@ -526,6 +526,11 @@ define void @f_no_create_undef_or_poison() nocreateundeforpoison {
         ret void;
 }
 
+; CHECK: define void @f_flatten() [[FLATTEN:#[0-9]+]]
+define void @f_flatten() flatten {
+        ret void;
+}
+
 ; CHECK: define void @f87() [[FNRETTHUNKEXTERN:#[0-9]+]]
 define void @f87() fn_ret_thunk_extern { ret void }
 
@@ -644,6 +649,7 @@ define void @dead_on_return_sized(ptr dead_on_return(4) %p) {
 ; CHECK: attributes #54 = { sanitize_realtime_blocking }
 ; CHECK: attributes #55 = { sanitize_alloc_token }
 ; CHECK: attributes #56 = { nocreateundeforpoison }
+; CHECK: attributes [[FLATTEN]] = { flatten }
 ; CHECK: attributes [[FNRETTHUNKEXTERN]] = { fn_ret_thunk_extern }
 ; CHECK: attributes [[SKIPPROFILE]] = { skipprofile }
 ; CHECK: attributes [[OPTDEBUG]] = { optdebug }

>From cd91e818264f818e6260a55660d01b59c5c2c63b Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Wed, 7 Jan 2026 17:35:33 -0800
Subject: [PATCH 02/16] Implement flattening logic in AlwaysInliner pass

---
 llvm/lib/Transforms/IPO/AlwaysInliner.cpp | 233 ++++++++++++++++------
 llvm/test/Transforms/Inline/flatten.ll    | 142 +++++++++++++
 2 files changed, 319 insertions(+), 56 deletions(-)
 create mode 100644 llvm/test/Transforms/Inline/flatten.ll

diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index 4fba4475767f6..d5656e5e3db74 100644
--- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -30,21 +30,191 @@ using namespace llvm;
 
 namespace {
 
+class InlinerHelper {
+  Module &M;
+  FunctionAnalysisManager *FAM;
+  function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
+  function_ref<AAResults &(Function &)> GetAAR;
+  bool InsertLifetime;
+
+  SmallSetVector<Function *, 16> MaybeInlinedFunctions;
+  InlineFunctionInfo IFI;
+
+public:
+  InlinerHelper(Module &M, ProfileSummaryInfo &PSI,
+                FunctionAnalysisManager *FAM,
+                function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
+                function_ref<AAResults &(Function &)> GetAAR,
+                bool InsertLifetime)
+      : M(M), FAM(FAM), GetAssumptionCache(GetAssumptionCache), GetAAR(GetAAR),
+        InsertLifetime(InsertLifetime), IFI(GetAssumptionCache, &PSI) {}
+
+  bool canInline(Function &F) {
+    return !F.isPresplitCoroutine() && !F.isDeclaration() &&
+           isInlineViable(F).isSuccess();
+  }
+
+  bool tryInline(CallBase &CB, StringRef InlignReason) {
+    IFI.reset();
+    Function &Callee = *CB.getCalledFunction();
+    Function *Caller = CB.getCaller();
+    OptimizationRemarkEmitter ORE(Caller);
+    DebugLoc DLoc = CB.getDebugLoc();
+    BasicBlock *Block = CB.getParent();
+
+    InlineResult Res = InlineFunction(CB, IFI, /*MergeAttributes=*/true,
+                                      &GetAAR(Callee), InsertLifetime);
+    if (!Res.isSuccess()) {
+      ORE.emit([&]() {
+        return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
+               << "'" << ore::NV("Callee", &Callee) << "' is not inlined into '"
+               << ore::NV("Caller", Caller)
+               << "': " << ore::NV("Reason", Res.getFailureReason());
+      });
+      return false;
+    }
+
+    emitInlinedIntoBasedOnCost(ORE, DLoc, Block, Callee, *Caller,
+                               InlineCost::getAlways(InlignReason.data()),
+                               /*ForProfileContext=*/false, DEBUG_TYPE);
+    if (FAM)
+      FAM->invalidate(*Caller, PreservedAnalyses::none());
+    return true;
+  }
+
+  void addNewCallsToWorklist(
+      SmallVectorImpl<std::pair<CallBase *, int>> &Worklist,
+      int InlineHistoryID,
+      SmallVectorImpl<std::pair<Function *, int>> &InlineHistory,
+      Function *InlinedCallee) {
+    if (IFI.InlinedCallSites.empty())
+      return;
+    int NewHistoryID = InlineHistory.size();
+    InlineHistory.push_back({InlinedCallee, InlineHistoryID});
+    for (CallBase *CB : IFI.InlinedCallSites)
+      Worklist.push_back({CB, NewHistoryID});
+  }
+
+  void addToMaybeInlinedFunctions(Function &F) {
+    MaybeInlinedFunctions.insert(&F);
+  }
+
+  bool postInlinerCleanup() {
+    SmallVector<Function *, 16> InlinedComdatFunctions;
+    bool Changed = false;
+    for (Function *F : MaybeInlinedFunctions) {
+      F->removeDeadConstantUsers();
+      if (F->hasFnAttribute(Attribute::AlwaysInline) &&
+          F->isDefTriviallyDead()) {
+        if (F->hasComdat()) {
+          InlinedComdatFunctions.push_back(F);
+        } else {
+          if (FAM)
+            FAM->clear(*F, F->getName());
+          M.getFunctionList().erase(F);
+          Changed = true;
+        }
+      }
+    }
+    if (!InlinedComdatFunctions.empty()) {
+      // Now we just have the comdat functions. Filter out the ones whose
+      // comdats are not actually dead.
+      filterDeadComdatFunctions(InlinedComdatFunctions);
+      // The remaining functions are actually dead.
+      for (Function *F : InlinedComdatFunctions) {
+        if (FAM)
+          FAM->clear(*F, F->getName());
+        M.getFunctionList().erase(F);
+        Changed = true;
+      }
+    }
+    return Changed;
+  }
+};
+
+static bool inlineHistoryIncludes(
+    Function *F, int InlineHistoryID,
+    const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
+  while (InlineHistoryID != -1) {
+    assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
+           "Invalid inline history ID");
+    if (InlineHistory[InlineHistoryID].first == F)
+      return true;
+    InlineHistoryID = InlineHistory[InlineHistoryID].second;
+  }
+  return false;
+}
+
+bool flattenFunction(Function &F, InlinerHelper &IH) {
+  SmallVector<std::pair<CallBase *, int>, 16> Worklist;
+  SmallVector<std::pair<Function *, int>, 16> InlineHistory;
+  OptimizationRemarkEmitter ORE(&F);
+
+  for (BasicBlock &BB : F) {
+    for (Instruction &I : BB) {
+      if (auto *CB = dyn_cast<CallBase>(&I)) {
+        if (CB->getAttributes().hasFnAttr(Attribute::NoInline))
+          continue;
+        Function *Callee = CB->getCalledFunction();
+        if (!Callee)
+          continue;
+        if (!IH.canInline(*Callee)) {
+          continue;
+        }
+        Worklist.push_back({CB, -1});
+      }
+    }
+  }
+  bool Changed = false;
+  while (!Worklist.empty()) {
+    std::pair<CallBase *, int> P = Worklist.pop_back_val();
+    CallBase *CB = P.first;
+    int InlineHistoryID = P.second;
+    Function *Callee = CB->getCalledFunction();
+    if (!Callee)
+      continue;
+
+    if (Callee == &F ||
+        inlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) {
+      ORE.emit([&]() {
+        return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined",
+                                        CB->getDebugLoc(), CB->getParent())
+               << "'" << ore::NV("Callee", Callee) << "' is not inlined into '"
+               << ore::NV("Caller", CB->getCaller())
+               << "': recursive call during flattening";
+      });
+      continue;
+    }
+
+    if (IH.tryInline(*CB, "flatten attribute")) {
+      Changed = true;
+      IH.addToMaybeInlinedFunctions(*Callee);
+      IH.addNewCallsToWorklist(Worklist, InlineHistoryID, InlineHistory,
+                               Callee);
+    }
+  }
+  return Changed;
+}
+
 bool AlwaysInlineImpl(
     Module &M, bool InsertLifetime, ProfileSummaryInfo &PSI,
     FunctionAnalysisManager *FAM,
     function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
     function_ref<AAResults &(Function &)> GetAAR) {
   SmallSetVector<CallBase *, 16> Calls;
+  InlinerHelper IH(M, PSI, FAM, GetAssumptionCache, GetAAR, InsertLifetime);
+  SmallVector<Function *, 4> NeedFlattening;
+
   bool Changed = false;
   SmallVector<Function *, 16> InlinedComdatFunctions;
 
   for (Function &F : make_early_inc_range(M)) {
-    if (F.isPresplitCoroutine())
-      continue;
+    if (F.hasFnAttribute(Attribute::Flatten))
+      NeedFlattening.push_back(&F);
 
-    if (F.isDeclaration() || !isInlineViable(F).isSuccess())
+    if (!IH.canInline(F))
       continue;
+    IH.addToMaybeInlinedFunctions(F);
 
     Calls.clear();
 
@@ -56,62 +226,13 @@ bool AlwaysInlineImpl(
           Calls.insert(CB);
 
     for (CallBase *CB : Calls) {
-      Function *Caller = CB->getCaller();
-      OptimizationRemarkEmitter ORE(Caller);
-      DebugLoc DLoc = CB->getDebugLoc();
-      BasicBlock *Block = CB->getParent();
-
-      InlineFunctionInfo IFI(GetAssumptionCache, &PSI, nullptr, nullptr);
-      InlineResult Res = InlineFunction(*CB, IFI, /*MergeAttributes=*/true,
-                                        &GetAAR(F), InsertLifetime);
-      if (!Res.isSuccess()) {
-        ORE.emit([&]() {
-          return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
-                 << "'" << ore::NV("Callee", &F) << "' is not inlined into '"
-                 << ore::NV("Caller", Caller)
-                 << "': " << ore::NV("Reason", Res.getFailureReason());
-        });
-        continue;
-      }
-
-      emitInlinedIntoBasedOnCost(
-          ORE, DLoc, Block, F, *Caller,
-          InlineCost::getAlways("always inline attribute"),
-          /*ForProfileContext=*/false, DEBUG_TYPE);
-
-      Changed = true;
-      if (FAM)
-        FAM->invalidate(*Caller, PreservedAnalyses::none());
-    }
-
-    F.removeDeadConstantUsers();
-    if (F.hasFnAttribute(Attribute::AlwaysInline) && F.isDefTriviallyDead()) {
-      // Remember to try and delete this function afterward. This allows to call
-      // filterDeadComdatFunctions() only once.
-      if (F.hasComdat()) {
-        InlinedComdatFunctions.push_back(&F);
-      } else {
-        if (FAM)
-          FAM->clear(F, F.getName());
-        M.getFunctionList().erase(F);
-        Changed = true;
-      }
-    }
-  }
-
-  if (!InlinedComdatFunctions.empty()) {
-    // Now we just have the comdat functions. Filter out the ones whose comdats
-    // are not actually dead.
-    filterDeadComdatFunctions(InlinedComdatFunctions);
-    // The remaining functions are actually dead.
-    for (Function *F : InlinedComdatFunctions) {
-      if (FAM)
-        FAM->clear(*F, F->getName());
-      M.getFunctionList().erase(F);
-      Changed = true;
+      Changed |= IH.tryInline(*CB, "always inline attribute");
     }
   }
+  for (Function *F : NeedFlattening)
+    Changed |= flattenFunction(*F, IH);
 
+  Changed |= IH.postInlinerCleanup();
   return Changed;
 }
 
diff --git a/llvm/test/Transforms/Inline/flatten.ll b/llvm/test/Transforms/Inline/flatten.ll
new file mode 100644
index 0000000000000..a50bcb6cb591b
--- /dev/null
+++ b/llvm/test/Transforms/Inline/flatten.ll
@@ -0,0 +1,142 @@
+; RUN: opt -passes=always-inline -S < %s | FileCheck %s
+; RUN: opt -passes=always-inline -pass-remarks-missed=inline -S < %s 2>&1 | FileCheck %s --check-prefix=REMARK
+
+; Test that the flatten attribute recursively inlines all calls.
+
+; Multiple levels are inlined.
+define internal i32 @leaf() {
+  ret i32 42
+}
+
+define internal i32 @middle() {
+  %r = call i32 @leaf()
+  ret i32 %r
+}
+
+define i32 @test_multilevel() flatten {
+; CHECK-LABEL: @test_multilevel(
+; CHECK-NOT: call i32 @middle
+; CHECK-NOT: call i32 @leaf
+; CHECK: ret i32 42
+  %r = call i32 @middle()
+  ret i32 %r
+}
+
+; Functions with invoke are inlined.
+declare i32 @__gxx_personality_v0(...)
+declare void @may_throw()
+
+define internal i32 @callee_with_invoke() personality ptr @__gxx_personality_v0 {
+entry:
+  invoke void @may_throw() to label %cont unwind label %lpad
+cont:
+  ret i32 100
+lpad:
+  %lp = landingpad { ptr, i32 } cleanup
+  resume { ptr, i32 } %lp
+}
+
+define i32 @test_invoke() flatten personality ptr @__gxx_personality_v0 {
+; CHECK-LABEL: @test_invoke(
+; CHECK-NOT: call i32 @callee_with_invoke
+; CHECK: invoke void @may_throw()
+; CHECK: ret i32 100
+entry:
+  %r = call i32 @callee_with_invoke()
+  ret i32 %r
+}
+
+; Declaration without definition is not inlined.
+declare i32 @external_func()
+
+define i32 @test_declaration() flatten {
+; CHECK-LABEL: @test_declaration(
+; CHECK: call i32 @external_func()
+; CHECK: ret i32
+  %r = call i32 @external_func()
+  ret i32 %r
+}
+
+; Indirect calls are not inlined.
+define internal i32 @target_func() {
+  ret i32 99
+}
+
+define i32 @test_indirect(ptr %func_ptr) flatten {
+; CHECK-LABEL: @test_indirect(
+; CHECK: call i32 %func_ptr()
+; CHECK: ret i32
+  %r = call i32 %func_ptr()
+  ret i32 %r
+}
+
+; Direct recursion back to flattened function.
+; The callee calls the flattened function - should not cause infinite inlining.
+define internal i32 @calls_flattened_func() {
+  %r = call i32 @test_direct_recursion()
+  ret i32 %r
+}
+
+define i32 @test_direct_recursion() flatten {
+; CHECK-LABEL: @test_direct_recursion(
+; The call to calls_flattened_func should be inlined, but the recursive call back
+; to test_direct_recursion should remain.
+; CHECK-NOT: call i32 @calls_flattened_func()
+; CHECK: call i32 @test_direct_recursion()
+; CHECK: ret i32
+  %r = call i32 @calls_flattened_func()
+  ret i32 %r
+}
+
+; Mutual recursion (A calls B, B calls A).
+; Should inline once but not infinitely.
+define internal i32 @mutual_a() {
+  %r = call i32 @mutual_b()
+  ret i32 %r
+}
+
+define internal i32 @mutual_b() {
+  %r = call i32 @mutual_a()
+  ret i32 %r
+}
+
+define i32 @test_mutual_recursion() flatten {
+; CHECK-LABEL: @test_mutual_recursion(
+; After inlining mutual_a, we get call to mutual_b.
+; After inlining mutual_b, we get call to mutual_a which should remain (skipped due to recursion).
+; CHECK-NOT: call i32 @mutual_b()
+; CHECK: call i32 @mutual_a()
+; CHECK: ret i32
+  %r = call i32 @mutual_a()
+  ret i32 %r
+}
+
+; Recursive callee via indirection.
+; A function that is part of a recursive cycle should be inlined once but not infinitely.
+; Note: Direct self-recursive functions (f calls f) are not inlineable in LLVM.
+; So we test with mutual recursion pattern where each function individually is viable.
+define internal i32 @recursive_a(i32 %n) {
+  %r = call i32 @recursive_b(i32 %n)
+  ret i32 %r
+}
+
+define internal i32 @recursive_b(i32 %n) {
+  %r = call i32 @recursive_a(i32 %n)
+  ret i32 %r
+}
+
+define i32 @test_self_recursion() flatten {
+; CHECK-LABEL: @test_self_recursion(
+; After inlining recursive_a (produces call to recursive_b with the original arg)
+; After inlining recursive_b (produces call to recursive_a - skipped due to history)
+; Both recursive_a and recursive_b should be inlined (CHECK-NOT matches any call to them)
+; The remaining call is to recursive_a with the propagated constant.
+; CHECK-NOT: call i32 @recursive_b
+; CHECK: call i32 @recursive_a(i32 5)
+; CHECK: ret i32
+  %r = call i32 @recursive_a(i32 5)
+  ret i32 %r
+}
+
+; Check that optimization remark is emitted for recursive calls during flattening.
+; REMARK: remark: {{.*}} 'test_direct_recursion' is not inlined into 'test_direct_recursion': recursive call during flattening

>From b8b5bea1a50e63359d50f290d213affbbd43eee5 Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Fri, 9 Jan 2026 12:32:03 -0800
Subject: [PATCH 03/16] Bug fixes: filter non-inlanable new call sites and ABI
 violations

---
 llvm/lib/Transforms/IPO/AlwaysInliner.cpp | 38 ++++++++++++++++++++---
 llvm/test/Transforms/Inline/flatten.ll    | 15 +++++++++
 2 files changed, 48 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index d5656e5e3db74..f729e84fe1db7 100644
--- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/Module.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Transforms/Utils/Cloning.h"
@@ -145,7 +146,8 @@ static bool inlineHistoryIncludes(
   return false;
 }
 
-bool flattenFunction(Function &F, InlinerHelper &IH) {
+bool flattenFunction(Function &F, InlinerHelper &IH,
+                     function_ref<TargetTransformInfo &(Function &)> GetTTI) {
   SmallVector<std::pair<CallBase *, int>, 16> Worklist;
   SmallVector<std::pair<Function *, int>, 16> InlineHistory;
   OptimizationRemarkEmitter ORE(&F);
@@ -186,6 +188,19 @@ bool flattenFunction(Function &F, InlinerHelper &IH) {
       continue;
     }
 
+    if (!IH.canInline(*Callee))
+      continue;
+
+    // Use TTI to check for target-specific hard inlining restrictions.
+    // This includes checks like:
+    // - Cannot inline streaming callee into non-streaming caller
+    // - Cannot inline functions that create new ZA/ZT0 state
+    // For flatten, we respect the user's intent to inline as much as possible,
+    // but these are fundamental ABI violations that cannot be worked around.
+    TargetTransformInfo &TTI = GetTTI(F);
+    if (!TTI.areInlineCompatible(&F, Callee))
+      continue;
+
     if (IH.tryInline(*CB, "flatten attribute")) {
       Changed = true;
       IH.addToMaybeInlinedFunctions(*Callee);
@@ -200,7 +215,8 @@ bool AlwaysInlineImpl(
     Module &M, bool InsertLifetime, ProfileSummaryInfo &PSI,
     FunctionAnalysisManager *FAM,
     function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
-    function_ref<AAResults &(Function &)> GetAAR) {
+    function_ref<AAResults &(Function &)> GetAAR,
+    function_ref<TargetTransformInfo &(Function &)> GetTTI) {
   SmallSetVector<CallBase *, 16> Calls;
   InlinerHelper IH(M, PSI, FAM, GetAssumptionCache, GetAAR, InsertLifetime);
   SmallVector<Function *, 4> NeedFlattening;
@@ -229,8 +245,12 @@ bool AlwaysInlineImpl(
       Changed |= IH.tryInline(*CB, "always inline attribute");
     }
   }
+
+  // Only call flattenFunction (which uses TTI) if there are functions to
+  // flatten. This ensures TTI analysis is not requested at -O0 when there are
+  // no flatten functions, avoiding any overhead.
   for (Function *F : NeedFlattening)
-    Changed |= flattenFunction(*F, IH);
+    Changed |= flattenFunction(*F, IH, GetTTI);
 
   Changed |= IH.postInlinerCleanup();
   return Changed;
@@ -255,9 +275,12 @@ struct AlwaysInlinerLegacyPass : public ModulePass {
     auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
       return getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
     };
+    auto GetTTI = [&](Function &F) -> TargetTransformInfo & {
+      return getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+    };
 
     return AlwaysInlineImpl(M, InsertLifetime, PSI, /*FAM=*/nullptr,
-                            GetAssumptionCache, GetAAR);
+                            GetAssumptionCache, GetAAR, GetTTI);
   }
 
   static char ID; // Pass identification, replacement for typeid
@@ -266,6 +289,7 @@ struct AlwaysInlinerLegacyPass : public ModulePass {
     AU.addRequired<AssumptionCacheTracker>();
     AU.addRequired<AAResultsWrapperPass>();
     AU.addRequired<ProfileSummaryInfoWrapperPass>();
+    AU.addRequired<TargetTransformInfoWrapperPass>();
   }
 };
 
@@ -277,6 +301,7 @@ INITIALIZE_PASS_BEGIN(AlwaysInlinerLegacyPass, "always-inline",
 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_END(AlwaysInlinerLegacyPass, "always-inline",
                     "Inliner for always_inline functions", false, false)
 
@@ -294,10 +319,13 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M,
   auto GetAAR = [&](Function &F) -> AAResults & {
     return FAM.getResult<AAManager>(F);
   };
+  auto GetTTI = [&](Function &F) -> TargetTransformInfo & {
+    return FAM.getResult<TargetIRAnalysis>(F);
+  };
   auto &PSI = MAM.getResult<ProfileSummaryAnalysis>(M);
 
   bool Changed = AlwaysInlineImpl(M, InsertLifetime, PSI, &FAM,
-                                  GetAssumptionCache, GetAAR);
+                                  GetAssumptionCache, GetAAR, GetTTI);
   if (!Changed)
     return PreservedAnalyses::all();
 
diff --git a/llvm/test/Transforms/Inline/flatten.ll b/llvm/test/Transforms/Inline/flatten.ll
index a50bcb6cb591b..70a08cd951b95 100644
--- a/llvm/test/Transforms/Inline/flatten.ll
+++ b/llvm/test/Transforms/Inline/flatten.ll
@@ -57,6 +57,21 @@ define i32 @test_declaration() flatten {
   ret i32 %r
 }
 
+; Inlined callee that calls a declaration - the declaration should remain after flattening.
+define internal i32 @calls_external() {
+  %r = call i32 @external_func()
+  ret i32 %r
+}
+
+define i32 @test_inline_then_declaration() flatten {
+; CHECK-LABEL: @test_inline_then_declaration(
+; CHECK-NOT: call i32 @calls_external()
+; CHECK: call i32 @external_func()
+; CHECK: ret i32
+  %r = call i32 @calls_external()
+  ret i32 %r
+}
+
 ; Indirect calls are not inlined.
 define internal i32 @target_func() {
   ret i32 99

>From c4f4638fc87a0c867462c55a25f1c103e3e7f65c Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Fri, 9 Jan 2026 12:39:49 -0800
Subject: [PATCH 04/16] Add test case to test ABI violation

---
 .../Transforms/Inline/AArch64/flatten-sme.ll  | 53 +++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100644 llvm/test/Transforms/Inline/AArch64/flatten-sme.ll

diff --git a/llvm/test/Transforms/Inline/AArch64/flatten-sme.ll b/llvm/test/Transforms/Inline/AArch64/flatten-sme.ll
new file mode 100644
index 0000000000000..e85902650487e
--- /dev/null
+++ b/llvm/test/Transforms/Inline/AArch64/flatten-sme.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+sme -S -passes=always-inline | FileCheck %s
+
+; Test that flatten attribute respects ABI restrictions for SME.
+; Streaming callee cannot be inlined into non-streaming caller.
+; new_za callee cannot be inlined at all.
+
+define internal i32 @streaming_callee() "aarch64_pstate_sm_enabled" {
+  ret i32 42
+}
+
+define internal i32 @new_za_callee() "aarch64_new_za" {
+  ret i32 100
+}
+
+define internal i32 @normal_callee() {
+  ret i32 50
+}
+
+; Streaming callee -> non-streaming caller: should NOT be inlined (ABI violation).
+define i32 @test_streaming_not_inlined() flatten {
+; CHECK-LABEL: @test_streaming_not_inlined(
+; CHECK: call i32 @streaming_callee()
+; CHECK: ret i32
+  %r = call i32 @streaming_callee()
+  ret i32 %r
+}
+
+; new_za callee: should NOT be inlined (ABI violation - callee allocates new ZA).
+define i32 @test_new_za_not_inlined() flatten {
+; CHECK-LABEL: @test_new_za_not_inlined(
+; CHECK: call i32 @new_za_callee()
+; CHECK: ret i32
+  %r = call i32 @new_za_callee()
+  ret i32 %r
+}
+
+; Streaming caller -> streaming callee: should be inlined (compatible).
+define i32 @test_streaming_to_streaming() flatten "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: @test_streaming_to_streaming(
+; CHECK-NOT: call i32 @streaming_callee
+; CHECK: ret i32 42
+  %r = call i32 @streaming_callee()
+  ret i32 %r
+}
+
+; Non-streaming caller -> non-streaming callee: should be inlined.
+define i32 @test_normal_inlined() flatten {
+; CHECK-LABEL: @test_normal_inlined(
+; CHECK-NOT: call i32 @normal_callee
+; CHECK: ret i32 50
+  %r = call i32 @normal_callee()
+  ret i32 %r
+}

>From b28f4677a7ee43d06971b7f0abbe0cdc72f9cb44 Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Wed, 14 Jan 2026 14:14:31 -0800
Subject: [PATCH 05/16] Run TTI on Callee

---
 llvm/lib/Transforms/IPO/AlwaysInliner.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index f729e84fe1db7..27f1a60d076d4 100644
--- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -197,7 +197,7 @@ bool flattenFunction(Function &F, InlinerHelper &IH,
     // - Cannot inline functions that create new ZA/ZT0 state
     // For flatten, we respect the user's intent to inline as much as possible,
     // but these are fundamental ABI violations that cannot be worked around.
-    TargetTransformInfo &TTI = GetTTI(F);
+    TargetTransformInfo &TTI = GetTTI(*Callee);
     if (!TTI.areInlineCompatible(&F, Callee))
       continue;
 

>From 65c55cd410817381c75b4fb32135c3168e6e79ba Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Thu, 22 Jan 2026 15:52:35 -0800
Subject: [PATCH 06/16] Add flattening logic to Inliner pass

---
 llvm/include/llvm/Analysis/InlineAdvisor.h |   7 ++
 llvm/lib/Analysis/InlineAdvisor.cpp        |  11 +++
 llvm/lib/Transforms/IPO/Inliner.cpp        | 106 +++++++++++++++++++++
 llvm/test/Transforms/Inline/flatten.ll     |   2 +
 4 files changed, 126 insertions(+)

diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h
index 50ba3c13da70f..d45b2c6dcd079 100644
--- a/llvm/include/llvm/Analysis/InlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/InlineAdvisor.h
@@ -180,6 +180,13 @@ class LLVM_ABI InlineAdvisor {
   std::unique_ptr<InlineAdvice> getAdvice(CallBase &CB,
                                           bool MandatoryOnly = false);
 
+  /// Get an InlineAdvice for a call site without performing cost analysis.
+  /// This is useful for cases like the flatten attribute where we want to
+  /// inline all viable calls regardless of cost. The viability checks
+  /// (TTI compatibility, noinline attributes, etc.) are still performed.
+  /// Returns advice with isInliningRecommended() = true if the call is viable.
+  std::unique_ptr<InlineAdvice> getAdviceWithoutCost(CallBase &CB);
+
   /// This must be called when the Inliner pass is entered, to allow the
   /// InlineAdvisor update internal state, as result of function passes run
   /// between Inliner pass runs (for the same module).
diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp
index 1fb2f7e780031..abee47a484eb7 100644
--- a/llvm/lib/Analysis/InlineAdvisor.cpp
+++ b/llvm/lib/Analysis/InlineAdvisor.cpp
@@ -654,6 +654,17 @@ std::unique_ptr<InlineAdvice> InlineAdvisor::getAdvice(CallBase &CB,
   return getMandatoryAdvice(CB, Advice);
 }
 
+std::unique_ptr<InlineAdvice>
+InlineAdvisor::getAdviceWithoutCost(CallBase &CB) {
+  // Check if the call is viable for inlining without performing cost analysis.
+  // This is useful for cases like the flatten attribute where we want to
+  // inline all viable calls regardless of cost.
+  bool IsViable = CB.getCaller() != CB.getCalledFunction() &&
+                  MandatoryInliningKind::Never !=
+                      getMandatoryKind(CB, FAM, getCallerORE(CB));
+  return getMandatoryAdvice(CB, IsViable);
+}
+
 OptimizationRemarkEmitter &InlineAdvisor::getCallerORE(CallBase &CB) {
   return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*CB.getCaller());
 }
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
index fb376562f6781..533af266f1f61 100644
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -195,6 +195,101 @@ InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
   return *IAA->getAdvisor();
 }
 
+/// Flatten a function by inlining all calls within it recursively.
+/// This implements the flatten attribute behavior for the CGSCC inliner.
+/// Returns true if any inlining was performed.
+static bool flattenFunction(Function &F, FunctionAnalysisManager &FAM,
+                            ProfileSummaryInfo *PSI, InlineAdvisor &Advisor) {
+  SmallVector<std::pair<CallBase *, int>, 16> Worklist;
+  SmallVector<std::pair<Function *, int>, 16> InlineHistory;
+
+  auto GetAssumptionCache = [&](Function &Fn) -> AssumptionCache & {
+    return FAM.getResult<AssumptionAnalysis>(Fn);
+  };
+
+  // Collect initial calls.
+  for (BasicBlock &BB : F) {
+    for (Instruction &I : BB) {
+      if (auto *CB = dyn_cast<CallBase>(&I)) {
+        if (CB->getAttributes().hasFnAttr(Attribute::NoInline))
+          continue;
+        Function *Callee = CB->getCalledFunction();
+        if (!Callee || Callee->isDeclaration())
+          continue;
+        Worklist.push_back({CB, -1});
+      }
+    }
+  }
+
+  bool Changed = false;
+  while (!Worklist.empty()) {
+    std::pair<CallBase *, int> P = Worklist.pop_back_val();
+    CallBase *CB = P.first;
+    int InlineHistoryID = P.second;
+    Function *Callee = CB->getCalledFunction();
+    if (!Callee)
+      continue;
+
+    // Detect recursion.
+    if (Callee == &F ||
+        inlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) {
+      LLVM_DEBUG(dbgs() << "Skipping recursive call during flattening: "
+                        << F.getName() << " -> " << Callee->getName() << "\n");
+      setInlineRemark(*CB, "recursive");
+      auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+      ORE.emit([&]() {
+        return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined",
+                                        CB->getDebugLoc(), CB->getParent())
+               << "'" << ore::NV("Callee", Callee) << "' is not inlined into '"
+               << ore::NV("Caller", CB->getCaller())
+               << "': recursive call during flattening";
+      });
+      continue;
+    }
+
+    // Use the advisor to check viability without performing cost analysis.
+    // For flatten, we want to inline all viable calls regardless of cost.
+    std::unique_ptr<InlineAdvice> Advice = Advisor.getAdviceWithoutCost(*CB);
+    if (!Advice)
+      continue;
+
+    if (!Advice->isInliningRecommended()) {
+      Advice->recordUnattemptedInlining();
+      continue;
+    }
+
+    InlineFunctionInfo IFI(GetAssumptionCache, PSI);
+
+    InlineResult IR =
+        InlineFunction(*CB, IFI, /*MergeAttributes=*/true,
+                       &FAM.getResult<AAManager>(F), /*InsertLifetime=*/true);
+    if (!IR.isSuccess()) {
+      Advice->recordUnsuccessfulInlining(IR);
+      continue;
+    }
+
+    Advice->recordInlining();
+    Changed = true;
+
+    // Add new call sites from the inlined function to the worklist.
+    if (!IFI.InlinedCallSites.empty()) {
+      int NewHistoryID = InlineHistory.size();
+      InlineHistory.push_back({Callee, InlineHistoryID});
+      for (CallBase *ICB : IFI.InlinedCallSites) {
+        Function *NewCallee = ICB->getCalledFunction();
+        if (NewCallee && !NewCallee->isDeclaration() &&
+            !ICB->getAttributes().hasFnAttr(Attribute::NoInline))
+          Worklist.push_back({ICB, NewHistoryID});
+      }
+    }
+  }
+
+  if (Changed)
+    FAM.invalidate(F, PreservedAnalyses::none());
+
+  return Changed;
+}
+
 void makeFunctionBodyUnreachable(Function &F) {
   F.dropAllReferences();
   for (BasicBlock &BB : make_early_inc_range(F))
@@ -248,8 +343,15 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
   // incrementally maknig a single function grow in a super linear fashion.
   SmallVector<std::pair<CallBase *, int>, 16> Calls;
 
+  // Track functions with flatten attribute for processing at the end.
+  SmallSetVector<Function *, 4> FlattenFunctions;
+
   // Populate the initial list of calls in this SCC.
   for (auto &N : InitialC) {
+    Function &Fn = N.getFunction();
+    if (Fn.hasFnAttribute(Attribute::Flatten))
+      FlattenFunctions.insert(&Fn);
+
     auto &ORE =
         FAM.getResult<OptimizationRemarkEmitterAnalysis>(N.getFunction());
     // We want to generally process call sites top-down in order for
@@ -535,6 +637,10 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
     FAM.invalidate(F, PreservedAnalyses::none());
   }
 
+  // Now flatten functions with the flatten attribute.
+  for (Function *FlattenF : FlattenFunctions)
+    Changed |= flattenFunction(*FlattenF, FAM, PSI, Advisor);
+
   // We must ensure that we only delete functions with comdats if every function
   // in the comdat is going to be deleted.
   if (!DeadFunctionsInComdats.empty()) {
diff --git a/llvm/test/Transforms/Inline/flatten.ll b/llvm/test/Transforms/Inline/flatten.ll
index 70a08cd951b95..e0e08383a16ef 100644
--- a/llvm/test/Transforms/Inline/flatten.ll
+++ b/llvm/test/Transforms/Inline/flatten.ll
@@ -1,5 +1,7 @@
 ; RUN: opt -passes=always-inline -S < %s | FileCheck %s
 ; RUN: opt -passes=always-inline -pass-remarks-missed=inline -S < %s 2>&1 | FileCheck %s --check-prefix=REMARK
+; RUN: opt -passes=inline -S < %s | FileCheck %s
+; RUN: opt -passes='cgscc(inline<only-mandatory>)' -S < %s | FileCheck %s
 
 ; Test that the flatten attribute recursively inlines all calls.
 

>From f0d3076ff440f8eed431c127e6f7d3d66b6b2f1c Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Thu, 22 Jan 2026 18:17:04 -0800
Subject: [PATCH 07/16] Move common code to utility file

---
 .../llvm/Transforms/IPO/InliningUtils.h       | 119 +++++++++++++++
 llvm/lib/Transforms/IPO/AlwaysInliner.cpp     | 108 ++++----------
 llvm/lib/Transforms/IPO/Inliner.cpp           | 140 ++++++------------
 3 files changed, 195 insertions(+), 172 deletions(-)
 create mode 100644 llvm/include/llvm/Transforms/IPO/InliningUtils.h

diff --git a/llvm/include/llvm/Transforms/IPO/InliningUtils.h b/llvm/include/llvm/Transforms/IPO/InliningUtils.h
new file mode 100644
index 0000000000000..520c9d8343c12
--- /dev/null
+++ b/llvm/include/llvm/Transforms/IPO/InliningUtils.h
@@ -0,0 +1,119 @@
+//===- InliningUtils.h - Shared inlining utilities -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines shared utilities used by the inliner passes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_INLININGUTILS_H
+#define LLVM_TRANSFORMS_IPO_INLININGUTILS_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+
+namespace llvm {
+
+/// Check if Function F appears in the inline history chain.
+/// InlineHistory is a vector of (Function, ParentHistoryID) pairs.
+/// Returns true if F was already inlined in the chain leading to
+/// InlineHistoryID.
+inline bool inlineHistoryIncludes(
+    Function *F, int InlineHistoryID,
+    const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
+  while (InlineHistoryID != -1) {
+    assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
+           "Invalid inline history ID");
+    if (InlineHistory[InlineHistoryID].first == F)
+      return true;
+    InlineHistoryID = InlineHistory[InlineHistoryID].second;
+  }
+  return false;
+}
+
+/// Flatten a function by inlining all calls recursively.
+///
+/// PolicyT must provide:
+///   - bool canInlineCall(Function &F, CallBase &CB): Check if call can be
+///       inlined into F
+///   - bool doInline(Function &F, CallBase &CB, Function &Callee): Perform
+///       the inline, return true on success
+///   - ArrayRef<CallBase *> getNewCallSites(): Get call sites from last inline
+///
+/// Returns true if any inlining was performed.
+template <typename PolicyT>
+bool flattenFunction(Function &F, PolicyT &Policy,
+                     OptimizationRemarkEmitter &ORE) {
+  SmallVector<std::pair<CallBase *, int>, 16> Worklist;
+  SmallVector<std::pair<Function *, int>, 16> InlineHistory;
+
+  // Collect initial calls.
+  for (BasicBlock &BB : F) {
+    for (Instruction &I : BB) {
+      if (auto *CB = dyn_cast<CallBase>(&I)) {
+        if (CB->getAttributes().hasFnAttr(Attribute::NoInline))
+          continue;
+        Function *Callee = CB->getCalledFunction();
+        if (!Callee || Callee->isDeclaration())
+          continue;
+        Worklist.push_back({CB, -1});
+      }
+    }
+  }
+
+  bool Changed = false;
+  while (!Worklist.empty()) {
+    auto Item = Worklist.pop_back_val();
+    CallBase *CB = Item.first;
+    int InlineHistoryID = Item.second;
+    Function *Callee = CB->getCalledFunction();
+    if (!Callee)
+      continue;
+
+    // Detect recursion.
+    if (Callee == &F ||
+        inlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) {
+      ORE.emit([&]() {
+        return OptimizationRemarkMissed("inline", "NotInlined",
+                                        CB->getDebugLoc(), CB->getParent())
+               << "'" << ore::NV("Callee", Callee) << "' is not inlined into '"
+               << ore::NV("Caller", CB->getCaller())
+               << "': recursive call during flattening";
+      });
+      continue;
+    }
+
+    if (!Policy.canInlineCall(F, *CB))
+      continue;
+
+    if (!Policy.doInline(F, *CB, *Callee))
+      continue;
+
+    Changed = true;
+
+    // Add new call sites from the inlined function to the worklist.
+    ArrayRef<CallBase *> NewCallSites = Policy.getNewCallSites();
+    if (!NewCallSites.empty()) {
+      int NewHistoryID = InlineHistory.size();
+      InlineHistory.push_back({Callee, InlineHistoryID});
+      for (CallBase *NewCB : NewCallSites) {
+        Function *NewCallee = NewCB->getCalledFunction();
+        if (NewCallee && !NewCallee->isDeclaration() &&
+            !NewCB->getAttributes().hasFnAttr(Attribute::NoInline))
+          Worklist.push_back({NewCB, NewHistoryID});
+      }
+    }
+  }
+
+  return Changed;
+}
+
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_IPO_INLININGUTILS_H
diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index 27f1a60d076d4..52adf0853db91 100644
--- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -22,6 +22,7 @@
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/Module.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/Transforms/IPO/InliningUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 
@@ -83,17 +84,8 @@ class InlinerHelper {
     return true;
   }
 
-  void addNewCallsToWorklist(
-      SmallVectorImpl<std::pair<CallBase *, int>> &Worklist,
-      int InlineHistoryID,
-      SmallVectorImpl<std::pair<Function *, int>> &InlineHistory,
-      Function *InlinedCallee) {
-    if (IFI.InlinedCallSites.empty())
-      return;
-    int NewHistoryID = InlineHistory.size();
-    InlineHistory.push_back({InlinedCallee, InlineHistoryID});
-    for (CallBase *CB : IFI.InlinedCallSites)
-      Worklist.push_back({CB, NewHistoryID});
+  ArrayRef<CallBase *> getInlinedCallSites() const {
+    return IFI.InlinedCallSites;
   }
 
   void addToMaybeInlinedFunctions(Function &F) {
@@ -133,64 +125,20 @@ class InlinerHelper {
   }
 };
 
-static bool inlineHistoryIncludes(
-    Function *F, int InlineHistoryID,
-    const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
-  while (InlineHistoryID != -1) {
-    assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
-           "Invalid inline history ID");
-    if (InlineHistory[InlineHistoryID].first == F)
-      return true;
-    InlineHistoryID = InlineHistory[InlineHistoryID].second;
-  }
-  return false;
-}
-
-bool flattenFunction(Function &F, InlinerHelper &IH,
-                     function_ref<TargetTransformInfo &(Function &)> GetTTI) {
-  SmallVector<std::pair<CallBase *, int>, 16> Worklist;
-  SmallVector<std::pair<Function *, int>, 16> InlineHistory;
-  OptimizationRemarkEmitter ORE(&F);
-
-  for (BasicBlock &BB : F) {
-    for (Instruction &I : BB) {
-      if (auto *CB = dyn_cast<CallBase>(&I)) {
-        if (CB->getAttributes().hasFnAttr(Attribute::NoInline))
-          continue;
-        Function *Callee = CB->getCalledFunction();
-        if (!Callee)
-          continue;
-        if (!IH.canInline(*Callee)) {
-          continue;
-        }
-        Worklist.push_back({CB, -1});
-      }
-    }
-  }
-  bool Changed = false;
-  while (!Worklist.empty()) {
-    std::pair<CallBase *, int> P = Worklist.pop_back_val();
-    CallBase *CB = P.first;
-    int InlineHistoryID = P.second;
-    Function *Callee = CB->getCalledFunction();
-    if (!Callee)
-      continue;
-
-    if (Callee == &F ||
-        inlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) {
-      ORE.emit([&]() {
-        return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined",
-                                        CB->getDebugLoc(), CB->getParent())
-               << "'" << ore::NV("Callee", Callee) << "' is not inlined into '"
-               << ore::NV("Caller", CB->getCaller())
-               << "': recursive call during flattening";
-      });
-      continue;
-    }
+/// Policy for flattenFunction template used by AlwaysInliner.
+class AlwaysInlinerFlattenPolicy {
+  InlinerHelper &IH;
+  function_ref<TargetTransformInfo &(Function &)> GetTTI;
 
-    if (!IH.canInline(*Callee))
-      continue;
+public:
+  AlwaysInlinerFlattenPolicy(
+      InlinerHelper &IH, function_ref<TargetTransformInfo &(Function &)> GetTTI)
+      : IH(IH), GetTTI(GetTTI) {}
 
+  bool canInlineCall(Function &F, CallBase &CB) {
+    Function *Callee = CB.getCalledFunction();
+    if (!Callee || !IH.canInline(*Callee))
+      return false;
     // Use TTI to check for target-specific hard inlining restrictions.
     // This includes checks like:
     // - Cannot inline streaming callee into non-streaming caller
@@ -198,18 +146,19 @@ bool flattenFunction(Function &F, InlinerHelper &IH,
     // For flatten, we respect the user's intent to inline as much as possible,
     // but these are fundamental ABI violations that cannot be worked around.
     TargetTransformInfo &TTI = GetTTI(*Callee);
-    if (!TTI.areInlineCompatible(&F, Callee))
-      continue;
+    return TTI.areInlineCompatible(&F, Callee);
+  }
 
-    if (IH.tryInline(*CB, "flatten attribute")) {
-      Changed = true;
-      IH.addToMaybeInlinedFunctions(*Callee);
-      IH.addNewCallsToWorklist(Worklist, InlineHistoryID, InlineHistory,
-                               Callee);
+  bool doInline(Function &F, CallBase &CB, Function &Callee) {
+    if (IH.tryInline(CB, "flatten attribute")) {
+      IH.addToMaybeInlinedFunctions(Callee);
+      return true;
     }
+    return false;
   }
-  return Changed;
-}
+
+  ArrayRef<CallBase *> getNewCallSites() { return IH.getInlinedCallSites(); }
+};
 
 bool AlwaysInlineImpl(
     Module &M, bool InsertLifetime, ProfileSummaryInfo &PSI,
@@ -249,8 +198,11 @@ bool AlwaysInlineImpl(
   // Only call flattenFunction (which uses TTI) if there are functions to
   // flatten. This ensures TTI analysis is not requested at -O0 when there are
   // no flatten functions, avoiding any overhead.
-  for (Function *F : NeedFlattening)
-    Changed |= flattenFunction(*F, IH, GetTTI);
+  for (Function *F : NeedFlattening) {
+    AlwaysInlinerFlattenPolicy Policy(IH, GetTTI);
+    OptimizationRemarkEmitter ORE(F);
+    Changed |= flattenFunction(*F, Policy, ORE);
+  }
 
   Changed |= IH.postInlinerCleanup();
   return Changed;
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
index 533af266f1f61..b0ce7eb457a84 100644
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -53,6 +53,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO/InliningUtils.h"
 #include "llvm/Transforms/Utils/CallPromotionUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
@@ -142,21 +143,6 @@ static cl::opt<CallSiteFormat::Format> CGSCCInlineReplayFormat(
                    "<Line Number>:<Column Number>.<Discriminator> (default)")),
     cl::desc("How cgscc inline replay file is formatted"), cl::Hidden);
 
-/// Return true if the specified inline history ID
-/// indicates an inline history that includes the specified function.
-static bool inlineHistoryIncludes(
-    Function *F, int InlineHistoryID,
-    const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
-  while (InlineHistoryID != -1) {
-    assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
-           "Invalid inline history ID");
-    if (InlineHistory[InlineHistoryID].first == F)
-      return true;
-    InlineHistoryID = InlineHistory[InlineHistoryID].second;
-  }
-  return false;
-}
-
 InlineAdvisor &
 InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
                         FunctionAnalysisManager &FAM, Module &M) {
@@ -195,100 +181,59 @@ InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
   return *IAA->getAdvisor();
 }
 
-/// Flatten a function by inlining all calls within it recursively.
-/// This implements the flatten attribute behavior for the CGSCC inliner.
-/// Returns true if any inlining was performed.
-static bool flattenFunction(Function &F, FunctionAnalysisManager &FAM,
-                            ProfileSummaryInfo *PSI, InlineAdvisor &Advisor) {
-  SmallVector<std::pair<CallBase *, int>, 16> Worklist;
-  SmallVector<std::pair<Function *, int>, 16> InlineHistory;
-
-  auto GetAssumptionCache = [&](Function &Fn) -> AssumptionCache & {
-    return FAM.getResult<AssumptionAnalysis>(Fn);
-  };
-
-  // Collect initial calls.
-  for (BasicBlock &BB : F) {
-    for (Instruction &I : BB) {
-      if (auto *CB = dyn_cast<CallBase>(&I)) {
-        if (CB->getAttributes().hasFnAttr(Attribute::NoInline))
-          continue;
-        Function *Callee = CB->getCalledFunction();
-        if (!Callee || Callee->isDeclaration())
-          continue;
-        Worklist.push_back({CB, -1});
-      }
-    }
+/// Policy for flattenFunction template used by CGSCC Inliner.
+class CGSCCInlinerFlattenPolicy {
+  FunctionAnalysisManager &FAM;
+  InlineAdvisor &Advisor;
+
+  std::function<AssumptionCache &(Function &)> GetAssumptionCache;
+  InlineFunctionInfo IFI;
+
+public:
+  CGSCCInlinerFlattenPolicy(FunctionAnalysisManager &FAM,
+                            ProfileSummaryInfo *PSI, InlineAdvisor &Advisor)
+      : FAM(FAM), Advisor(Advisor),
+        GetAssumptionCache([&FAM](Function &Fn) -> AssumptionCache & {
+          return FAM.getResult<AssumptionAnalysis>(Fn);
+        }),
+        IFI(GetAssumptionCache, PSI) {}
+
+  bool canInlineCall(Function &F, CallBase &CB) {
+    // This is called both during initial collection and during worklist
+    // processing. We only do cheap checks here - the advisor is called
+    // in doInline to avoid creating InlineAdvice objects that might not
+    // be properly recorded.
+    Function *Callee = CB.getCalledFunction();
+    if (!Callee || Callee->isDeclaration())
+      return false;
+    return isInlineViable(*Callee).isSuccess();
   }
 
-  bool Changed = false;
-  while (!Worklist.empty()) {
-    std::pair<CallBase *, int> P = Worklist.pop_back_val();
-    CallBase *CB = P.first;
-    int InlineHistoryID = P.second;
-    Function *Callee = CB->getCalledFunction();
-    if (!Callee)
-      continue;
-
-    // Detect recursion.
-    if (Callee == &F ||
-        inlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) {
-      LLVM_DEBUG(dbgs() << "Skipping recursive call during flattening: "
-                        << F.getName() << " -> " << Callee->getName() << "\n");
-      setInlineRemark(*CB, "recursive");
-      auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
-      ORE.emit([&]() {
-        return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined",
-                                        CB->getDebugLoc(), CB->getParent())
-               << "'" << ore::NV("Callee", Callee) << "' is not inlined into '"
-               << ore::NV("Caller", CB->getCaller())
-               << "': recursive call during flattening";
-      });
-      continue;
-    }
-
+  bool doInline(Function &F, CallBase &CB, Function &Callee) {
     // Use the advisor to check viability without performing cost analysis.
     // For flatten, we want to inline all viable calls regardless of cost.
-    std::unique_ptr<InlineAdvice> Advice = Advisor.getAdviceWithoutCost(*CB);
+    std::unique_ptr<InlineAdvice> Advice = Advisor.getAdviceWithoutCost(CB);
     if (!Advice)
-      continue;
-
+      return false;
     if (!Advice->isInliningRecommended()) {
       Advice->recordUnattemptedInlining();
-      continue;
+      return false;
     }
 
-    InlineFunctionInfo IFI(GetAssumptionCache, PSI);
-
+    IFI.reset();
     InlineResult IR =
-        InlineFunction(*CB, IFI, /*MergeAttributes=*/true,
+        InlineFunction(CB, IFI, /*MergeAttributes=*/true,
                        &FAM.getResult<AAManager>(F), /*InsertLifetime=*/true);
     if (!IR.isSuccess()) {
       Advice->recordUnsuccessfulInlining(IR);
-      continue;
+      return false;
     }
-
     Advice->recordInlining();
-    Changed = true;
-
-    // Add new call sites from the inlined function to the worklist.
-    if (!IFI.InlinedCallSites.empty()) {
-      int NewHistoryID = InlineHistory.size();
-      InlineHistory.push_back({Callee, InlineHistoryID});
-      for (CallBase *ICB : IFI.InlinedCallSites) {
-        Function *NewCallee = ICB->getCalledFunction();
-        if (NewCallee && !NewCallee->isDeclaration() &&
-            !ICB->getAttributes().hasFnAttr(Attribute::NoInline))
-          Worklist.push_back({ICB, NewHistoryID});
-      }
-    }
+    return true;
   }
 
-  if (Changed)
-    FAM.invalidate(F, PreservedAnalyses::none());
-
-  return Changed;
-}
+  ArrayRef<CallBase *> getNewCallSites() { return IFI.InlinedCallSites; }
+};
 
 void makeFunctionBodyUnreachable(Function &F) {
   F.dropAllReferences();
@@ -638,8 +583,15 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
   }
 
   // Now flatten functions with the flatten attribute.
-  for (Function *FlattenF : FlattenFunctions)
-    Changed |= flattenFunction(*FlattenF, FAM, PSI, Advisor);
+  for (Function *FlattenF : FlattenFunctions) {
+    CGSCCInlinerFlattenPolicy Policy(FAM, PSI, Advisor);
+    OptimizationRemarkEmitter &ORE =
+        FAM.getResult<OptimizationRemarkEmitterAnalysis>(*FlattenF);
+    bool FlattenChanged = flattenFunction(*FlattenF, Policy, ORE);
+    if (FlattenChanged)
+      FAM.invalidate(*FlattenF, PreservedAnalyses::none());
+    Changed |= FlattenChanged;
+  }
 
   // We must ensure that we only delete functions with comdats if every function
   // in the comdat is going to be deleted.

>From ff0cfaf9eb2ad1de78c96f36250bec9df3f783c7 Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Thu, 22 Jan 2026 18:26:04 -0800
Subject: [PATCH 08/16] Change flatten-sme test to check Inliner as well

---
 llvm/test/Transforms/Inline/AArch64/flatten-sme.ll | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/test/Transforms/Inline/AArch64/flatten-sme.ll b/llvm/test/Transforms/Inline/AArch64/flatten-sme.ll
index e85902650487e..dcb82e995b69f 100644
--- a/llvm/test/Transforms/Inline/AArch64/flatten-sme.ll
+++ b/llvm/test/Transforms/Inline/AArch64/flatten-sme.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+sme -S -passes=always-inline | FileCheck %s
+; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+sme -S -passes=inline | FileCheck %s
 
 ; Test that flatten attribute respects ABI restrictions for SME.
 ; Streaming callee cannot be inlined into non-streaming caller.

>From 24f5f56b892059843a52d51b9760d3ba191ef367 Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Wed, 18 Feb 2026 15:07:32 -0800
Subject: [PATCH 09/16] Simplify flatten attribute handling by removing
 template and policy classes

Remove flattenFunction template and CGSCCInlinerFlattenPolicy /
AlwaysInlinerFlattenPolicy classes. Handle flatten directly in the
CGSCC Inliner's main loop via getAdviceWithoutCost and with a local
worklist in AlwaysInliner.
---
 .../llvm/Transforms/IPO/InliningUtils.h       |  79 -------------
 llvm/lib/Transforms/IPO/AlwaysInliner.cpp     | 105 +++++++++++-------
 llvm/lib/Transforms/IPO/Inliner.cpp           |  78 +------------
 3 files changed, 70 insertions(+), 192 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/InliningUtils.h b/llvm/include/llvm/Transforms/IPO/InliningUtils.h
index 520c9d8343c12..021ff00cdd202 100644
--- a/llvm/include/llvm/Transforms/IPO/InliningUtils.h
+++ b/llvm/include/llvm/Transforms/IPO/InliningUtils.h
@@ -14,9 +14,7 @@
 #define LLVM_TRANSFORMS_IPO_INLININGUTILS_H
 
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
 
 namespace llvm {
 
@@ -37,83 +35,6 @@ inline bool inlineHistoryIncludes(
   return false;
 }
 
-/// Flatten a function by inlining all calls recursively.
-///
-/// PolicyT must provide:
-///   - bool canInlineCall(Function &F, CallBase &CB): Check if call can be
-///       inlined into F
-///   - bool doInline(Function &F, CallBase &CB, Function &Callee): Perform
-///       the inline, return true on success
-///   - ArrayRef<CallBase *> getNewCallSites(): Get call sites from last inline
-///
-/// Returns true if any inlining was performed.
-template <typename PolicyT>
-bool flattenFunction(Function &F, PolicyT &Policy,
-                     OptimizationRemarkEmitter &ORE) {
-  SmallVector<std::pair<CallBase *, int>, 16> Worklist;
-  SmallVector<std::pair<Function *, int>, 16> InlineHistory;
-
-  // Collect initial calls.
-  for (BasicBlock &BB : F) {
-    for (Instruction &I : BB) {
-      if (auto *CB = dyn_cast<CallBase>(&I)) {
-        if (CB->getAttributes().hasFnAttr(Attribute::NoInline))
-          continue;
-        Function *Callee = CB->getCalledFunction();
-        if (!Callee || Callee->isDeclaration())
-          continue;
-        Worklist.push_back({CB, -1});
-      }
-    }
-  }
-
-  bool Changed = false;
-  while (!Worklist.empty()) {
-    auto Item = Worklist.pop_back_val();
-    CallBase *CB = Item.first;
-    int InlineHistoryID = Item.second;
-    Function *Callee = CB->getCalledFunction();
-    if (!Callee)
-      continue;
-
-    // Detect recursion.
-    if (Callee == &F ||
-        inlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) {
-      ORE.emit([&]() {
-        return OptimizationRemarkMissed("inline", "NotInlined",
-                                        CB->getDebugLoc(), CB->getParent())
-               << "'" << ore::NV("Callee", Callee) << "' is not inlined into '"
-               << ore::NV("Caller", CB->getCaller())
-               << "': recursive call during flattening";
-      });
-      continue;
-    }
-
-    if (!Policy.canInlineCall(F, *CB))
-      continue;
-
-    if (!Policy.doInline(F, *CB, *Callee))
-      continue;
-
-    Changed = true;
-
-    // Add new call sites from the inlined function to the worklist.
-    ArrayRef<CallBase *> NewCallSites = Policy.getNewCallSites();
-    if (!NewCallSites.empty()) {
-      int NewHistoryID = InlineHistory.size();
-      InlineHistory.push_back({Callee, InlineHistoryID});
-      for (CallBase *NewCB : NewCallSites) {
-        Function *NewCallee = NewCB->getCalledFunction();
-        if (NewCallee && !NewCallee->isDeclaration() &&
-            !NewCB->getAttributes().hasFnAttr(Attribute::NoInline))
-          Worklist.push_back({NewCB, NewHistoryID});
-      }
-    }
-  }
-
-  return Changed;
-}
-
 } // namespace llvm
 
 #endif // LLVM_TRANSFORMS_IPO_INLININGUTILS_H
diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index 52adf0853db91..d27eb9d777028 100644
--- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -125,41 +125,6 @@ class InlinerHelper {
   }
 };
 
-/// Policy for flattenFunction template used by AlwaysInliner.
-class AlwaysInlinerFlattenPolicy {
-  InlinerHelper &IH;
-  function_ref<TargetTransformInfo &(Function &)> GetTTI;
-
-public:
-  AlwaysInlinerFlattenPolicy(
-      InlinerHelper &IH, function_ref<TargetTransformInfo &(Function &)> GetTTI)
-      : IH(IH), GetTTI(GetTTI) {}
-
-  bool canInlineCall(Function &F, CallBase &CB) {
-    Function *Callee = CB.getCalledFunction();
-    if (!Callee || !IH.canInline(*Callee))
-      return false;
-    // Use TTI to check for target-specific hard inlining restrictions.
-    // This includes checks like:
-    // - Cannot inline streaming callee into non-streaming caller
-    // - Cannot inline functions that create new ZA/ZT0 state
-    // For flatten, we respect the user's intent to inline as much as possible,
-    // but these are fundamental ABI violations that cannot be worked around.
-    TargetTransformInfo &TTI = GetTTI(*Callee);
-    return TTI.areInlineCompatible(&F, Callee);
-  }
-
-  bool doInline(Function &F, CallBase &CB, Function &Callee) {
-    if (IH.tryInline(CB, "flatten attribute")) {
-      IH.addToMaybeInlinedFunctions(Callee);
-      return true;
-    }
-    return false;
-  }
-
-  ArrayRef<CallBase *> getNewCallSites() { return IH.getInlinedCallSites(); }
-};
-
 bool AlwaysInlineImpl(
     Module &M, bool InsertLifetime, ProfileSummaryInfo &PSI,
     FunctionAnalysisManager *FAM,
@@ -195,13 +160,73 @@ bool AlwaysInlineImpl(
     }
   }
 
-  // Only call flattenFunction (which uses TTI) if there are functions to
-  // flatten. This ensures TTI analysis is not requested at -O0 when there are
-  // no flatten functions, avoiding any overhead.
+  // Flatten functions with the flatten attribute using a local worklist.
   for (Function *F : NeedFlattening) {
-    AlwaysInlinerFlattenPolicy Policy(IH, GetTTI);
+    SmallVector<std::pair<CallBase *, int>, 16> Worklist;
+    SmallVector<std::pair<Function *, int>, 16> InlineHistory;
     OptimizationRemarkEmitter ORE(F);
-    Changed |= flattenFunction(*F, Policy, ORE);
+
+    // Collect initial calls.
+    for (BasicBlock &BB : *F)
+      for (Instruction &I : BB)
+        if (auto *CB = dyn_cast<CallBase>(&I)) {
+          if (CB->getAttributes().hasFnAttr(Attribute::NoInline))
+            continue;
+          Function *Callee = CB->getCalledFunction();
+          if (!Callee || Callee->isDeclaration())
+            continue;
+          Worklist.push_back({CB, -1});
+        }
+
+    while (!Worklist.empty()) {
+      auto Item = Worklist.pop_back_val();
+      CallBase *CB = Item.first;
+      int InlineHistoryID = Item.second;
+      Function *Callee = CB->getCalledFunction();
+      if (!Callee)
+        continue;
+
+      // Detect recursion.
+      if (Callee == F ||
+          inlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) {
+        ORE.emit([&]() {
+          return OptimizationRemarkMissed("inline", "NotInlined",
+                                          CB->getDebugLoc(), CB->getParent())
+                 << "'" << ore::NV("Callee", Callee)
+                 << "' is not inlined into '"
+                 << ore::NV("Caller", CB->getCaller())
+                 << "': recursive call during flattening";
+        });
+        continue;
+      }
+
+      if (!IH.canInline(*Callee))
+        continue;
+
+      // Check TTI for target-specific inlining restrictions (e.g., SME ABI).
+      TargetTransformInfo &TTI = GetTTI(*Callee);
+      if (!TTI.areInlineCompatible(F, Callee))
+        continue;
+
+      if (!IH.tryInline(*CB, "flatten attribute"))
+        continue;
+
+      IH.addToMaybeInlinedFunctions(*Callee);
+      Changed = true;
+
+      // Add new call sites from the inlined function to the worklist.
+      ArrayRef<CallBase *> NewCallSites = IH.getInlinedCallSites();
+      if (!NewCallSites.empty()) {
+        int NewHistoryID = InlineHistory.size();
+        InlineHistory.push_back({Callee, InlineHistoryID});
+        for (CallBase *NewCB : NewCallSites) {
+          Function *NewCallee = NewCB->getCalledFunction();
+          if (NewCallee && !NewCallee->isDeclaration() &&
+              !NewCB->getAttributes().hasFnAttr(Attribute::NoInline))
+            Worklist.push_back({NewCB, NewHistoryID});
+        }
+      }
+    }
   }
 
   Changed |= IH.postInlinerCleanup();
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
index b0ce7eb457a84..b7d5ee367d33b 100644
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -181,60 +181,6 @@ InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
   return *IAA->getAdvisor();
 }
 
-/// Policy for flattenFunction template used by CGSCC Inliner.
-class CGSCCInlinerFlattenPolicy {
-  FunctionAnalysisManager &FAM;
-  InlineAdvisor &Advisor;
-
-  std::function<AssumptionCache &(Function &)> GetAssumptionCache;
-  InlineFunctionInfo IFI;
-
-public:
-  CGSCCInlinerFlattenPolicy(FunctionAnalysisManager &FAM,
-                            ProfileSummaryInfo *PSI, InlineAdvisor &Advisor)
-      : FAM(FAM), Advisor(Advisor),
-        GetAssumptionCache([&FAM](Function &Fn) -> AssumptionCache & {
-          return FAM.getResult<AssumptionAnalysis>(Fn);
-        }),
-        IFI(GetAssumptionCache, PSI) {}
-
-  bool canInlineCall(Function &F, CallBase &CB) {
-    // This is called both during initial collection and during worklist
-    // processing. We only do cheap checks here - the advisor is called
-    // in doInline to avoid creating InlineAdvice objects that might not
-    // be properly recorded.
-    Function *Callee = CB.getCalledFunction();
-    if (!Callee || Callee->isDeclaration())
-      return false;
-    return isInlineViable(*Callee).isSuccess();
-  }
-
-  bool doInline(Function &F, CallBase &CB, Function &Callee) {
-    // Use the advisor to check viability without performing cost analysis.
-    // For flatten, we want to inline all viable calls regardless of cost.
-    std::unique_ptr<InlineAdvice> Advice = Advisor.getAdviceWithoutCost(CB);
-    if (!Advice)
-      return false;
-    if (!Advice->isInliningRecommended()) {
-      Advice->recordUnattemptedInlining();
-      return false;
-    }
-
-    IFI.reset();
-    InlineResult IR =
-        InlineFunction(CB, IFI, /*MergeAttributes=*/true,
-                       &FAM.getResult<AAManager>(F), /*InsertLifetime=*/true);
-    if (!IR.isSuccess()) {
-      Advice->recordUnsuccessfulInlining(IR);
-      return false;
-    }
-    Advice->recordInlining();
-    return true;
-  }
-
-  ArrayRef<CallBase *> getNewCallSites() { return IFI.InlinedCallSites; }
-};
-
 void makeFunctionBodyUnreachable(Function &F) {
   F.dropAllReferences();
   for (BasicBlock &BB : make_early_inc_range(F))
@@ -288,15 +234,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
   // incrementally maknig a single function grow in a super linear fashion.
   SmallVector<std::pair<CallBase *, int>, 16> Calls;
 
-  // Track functions with flatten attribute for processing at the end.
-  SmallSetVector<Function *, 4> FlattenFunctions;
-
   // Populate the initial list of calls in this SCC.
   for (auto &N : InitialC) {
-    Function &Fn = N.getFunction();
-    if (Fn.hasFnAttribute(Attribute::Flatten))
-      FlattenFunctions.insert(&Fn);
-
     auto &ORE =
         FAM.getResult<OptimizationRemarkEmitterAnalysis>(N.getFunction());
     // We want to generally process call sites top-down in order for
@@ -405,8 +344,12 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
         continue;
       }
 
+      // For flatten callers, inline all viable calls without cost analysis.
+      bool IsFlatten = F.hasFnAttribute(Attribute::Flatten) &&
+                       !CB->getAttributes().hasFnAttr(Attribute::NoInline);
       std::unique_ptr<InlineAdvice> Advice =
-          Advisor.getAdvice(*CB, OnlyMandatory);
+          IsFlatten ? Advisor.getAdviceWithoutCost(*CB)
+                    : Advisor.getAdvice(*CB, OnlyMandatory);
 
       // Check whether we want to inline this callsite.
       if (!Advice)
@@ -582,17 +525,6 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
     FAM.invalidate(F, PreservedAnalyses::none());
   }
 
-  // Now flatten functions with the flatten attribute.
-  for (Function *FlattenF : FlattenFunctions) {
-    CGSCCInlinerFlattenPolicy Policy(FAM, PSI, Advisor);
-    OptimizationRemarkEmitter &ORE =
-        FAM.getResult<OptimizationRemarkEmitterAnalysis>(*FlattenF);
-    bool FlattenChanged = flattenFunction(*FlattenF, Policy, ORE);
-    if (FlattenChanged)
-      FAM.invalidate(*FlattenF, PreservedAnalyses::none());
-    Changed |= FlattenChanged;
-  }
-
   // We must ensure that we only delete functions with comdats if every function
   // in the comdat is going to be deleted.
   if (!DeadFunctionsInComdats.empty()) {

>From acbb51739faf6411f9a1e97a34703c73008fa411 Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Tue, 24 Feb 2026 14:58:03 -0800
Subject: [PATCH 10/16] Handle flatten in getAttributeBasedInliningDecision
 instead of getAdviceWithoutCost

---
 llvm/include/llvm/Analysis/InlineAdvisor.h |  7 -------
 llvm/lib/Analysis/InlineAdvisor.cpp        | 11 -----------
 llvm/lib/Analysis/InlineCost.cpp           |  8 ++++++++
 llvm/lib/Transforms/IPO/Inliner.cpp        |  6 +-----
 4 files changed, 9 insertions(+), 23 deletions(-)

diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h
index d45b2c6dcd079..50ba3c13da70f 100644
--- a/llvm/include/llvm/Analysis/InlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/InlineAdvisor.h
@@ -180,13 +180,6 @@ class LLVM_ABI InlineAdvisor {
   std::unique_ptr<InlineAdvice> getAdvice(CallBase &CB,
                                           bool MandatoryOnly = false);
 
-  /// Get an InlineAdvice for a call site without performing cost analysis.
-  /// This is useful for cases like the flatten attribute where we want to
-  /// inline all viable calls regardless of cost. The viability checks
-  /// (TTI compatibility, noinline attributes, etc.) are still performed.
-  /// Returns advice with isInliningRecommended() = true if the call is viable.
-  std::unique_ptr<InlineAdvice> getAdviceWithoutCost(CallBase &CB);
-
   /// This must be called when the Inliner pass is entered, to allow the
   /// InlineAdvisor update internal state, as result of function passes run
   /// between Inliner pass runs (for the same module).
diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp
index abee47a484eb7..1fb2f7e780031 100644
--- a/llvm/lib/Analysis/InlineAdvisor.cpp
+++ b/llvm/lib/Analysis/InlineAdvisor.cpp
@@ -654,17 +654,6 @@ std::unique_ptr<InlineAdvice> InlineAdvisor::getAdvice(CallBase &CB,
   return getMandatoryAdvice(CB, Advice);
 }
 
-std::unique_ptr<InlineAdvice>
-InlineAdvisor::getAdviceWithoutCost(CallBase &CB) {
-  // Check if the call is viable for inlining without performing cost analysis.
-  // This is useful for cases like the flatten attribute where we want to
-  // inline all viable calls regardless of cost.
-  bool IsViable = CB.getCaller() != CB.getCalledFunction() &&
-                  MandatoryInliningKind::Never !=
-                      getMandatoryKind(CB, FAM, getCallerORE(CB));
-  return getMandatoryAdvice(CB, IsViable);
-}
-
 OptimizationRemarkEmitter &InlineAdvisor::getCallerORE(CallBase &CB) {
   return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*CB.getCaller());
 }
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index e0054e3ed6ee2..f949ac94b99cc 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -3254,6 +3254,14 @@ std::optional<InlineResult> llvm::getAttributeBasedInliningDecision(
   if (Callee->hasFnAttribute("loader-replaceable"))
     return InlineResult::failure("loader replaceable function attribute");
 
+  // Flatten: inline all viable calls from flatten functions regardless of cost.
+  if (Caller->hasFnAttribute(Attribute::Flatten)) {
+    auto IsViable = isInlineViable(*Callee);
+    if (IsViable.isSuccess())
+      return InlineResult::success();
+    return InlineResult::failure(IsViable.getFailureReason());
+  }
+
   return std::nullopt;
 }
 
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
index b7d5ee367d33b..af11c5415f795 100644
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -344,12 +344,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
         continue;
       }
 
-      // For flatten callers, inline all viable calls without cost analysis.
-      bool IsFlatten = F.hasFnAttribute(Attribute::Flatten) &&
-                       !CB->getAttributes().hasFnAttr(Attribute::NoInline);
       std::unique_ptr<InlineAdvice> Advice =
-          IsFlatten ? Advisor.getAdviceWithoutCost(*CB)
-                    : Advisor.getAdvice(*CB, OnlyMandatory);
+          Advisor.getAdvice(*CB, OnlyMandatory);
 
       // Check whether we want to inline this callsite.
       if (!Advice)

>From 88ef480d74301d7d7db4db83e197ed7abd78c59e Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Tue, 24 Feb 2026 15:23:47 -0800
Subject: [PATCH 11/16] Move inlineHistoryIncludes from InliningUtils.h to
 Cloning.h/InlineFunction.cpp

---
 .../llvm/Transforms/IPO/InliningUtils.h       | 40 -------------------
 llvm/include/llvm/Transforms/Utils/Cloning.h  |  8 ++++
 llvm/lib/Transforms/IPO/AlwaysInliner.cpp     |  1 -
 llvm/lib/Transforms/IPO/Inliner.cpp           |  1 -
 llvm/lib/Transforms/Utils/InlineFunction.cpp  | 13 ++++++
 5 files changed, 21 insertions(+), 42 deletions(-)
 delete mode 100644 llvm/include/llvm/Transforms/IPO/InliningUtils.h

diff --git a/llvm/include/llvm/Transforms/IPO/InliningUtils.h b/llvm/include/llvm/Transforms/IPO/InliningUtils.h
deleted file mode 100644
index 021ff00cdd202..0000000000000
--- a/llvm/include/llvm/Transforms/IPO/InliningUtils.h
+++ /dev/null
@@ -1,40 +0,0 @@
-//===- InliningUtils.h - Shared inlining utilities -------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines shared utilities used by the inliner passes.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TRANSFORMS_IPO_INLININGUTILS_H
-#define LLVM_TRANSFORMS_IPO_INLININGUTILS_H
-
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/Function.h"
-
-namespace llvm {
-
-/// Check if Function F appears in the inline history chain.
-/// InlineHistory is a vector of (Function, ParentHistoryID) pairs.
-/// Returns true if F was already inlined in the chain leading to
-/// InlineHistoryID.
-inline bool inlineHistoryIncludes(
-    Function *F, int InlineHistoryID,
-    const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
-  while (InlineHistoryID != -1) {
-    assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
-           "Invalid inline history ID");
-    if (InlineHistory[InlineHistoryID].first == F)
-      return true;
-    InlineHistoryID = InlineHistory[InlineHistoryID].second;
-  }
-  return false;
-}
-
-} // namespace llvm
-
-#endif // LLVM_TRANSFORMS_IPO_INLININGUTILS_H
diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h
index cfa06a5be79fd..b0df674e9b0d7 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -439,6 +439,14 @@ LLVM_ABI void cloneAndAdaptNoAliasScopes(ArrayRef<MDNode *> NoAliasDeclScopes,
 LLVM_ABI void cloneAndAdaptNoAliasScopes(ArrayRef<MDNode *> NoAliasDeclScopes,
                                          Instruction *IStart, Instruction *IEnd,
                                          LLVMContext &Context, StringRef Ext);
+/// Check if Function F appears in the inline history chain.
+/// InlineHistory is a vector of (Function, ParentHistoryID) pairs.
+/// Returns true if F was already inlined in the chain leading to
+/// InlineHistoryID.
+LLVM_ABI bool inlineHistoryIncludes(
+    Function *F, int InlineHistoryID,
+    const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory);
+
 } // end namespace llvm
 
 #endif // LLVM_TRANSFORMS_UTILS_CLONING_H
diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index d27eb9d777028..c0c3cfee8af0d 100644
--- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -22,7 +22,6 @@
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/Module.h"
 #include "llvm/InitializePasses.h"
-#include "llvm/Transforms/IPO/InliningUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
index af11c5415f795..d795fbbbe4120 100644
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -53,7 +53,6 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/IPO/InliningUtils.h"
 #include "llvm/Transforms/Utils/CallPromotionUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 3230b306f17d1..a437e8d52f805 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -3443,3 +3443,16 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
 
   return Result;
 }
+
+bool llvm::inlineHistoryIncludes(
+    Function *F, int InlineHistoryID,
+    const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
+  while (InlineHistoryID != -1) {
+    assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
+           "Invalid inline history ID");
+    if (InlineHistory[InlineHistoryID].first == F)
+      return true;
+    InlineHistoryID = InlineHistory[InlineHistoryID].second;
+  }
+  return false;
+}

>From 6e2f08c96e59a1de8c73a718db08b7c41803db83 Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Wed, 25 Feb 2026 09:08:44 -0800
Subject: [PATCH 12/16] Remove InlinerHelper class, use local TryInline lambda
 instead

---
 llvm/lib/Transforms/IPO/AlwaysInliner.cpp | 142 ++++++++--------------
 1 file changed, 51 insertions(+), 91 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index c0c3cfee8af0d..629befa66cfbc 100644
--- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -31,38 +31,26 @@ using namespace llvm;
 
 namespace {
 
-class InlinerHelper {
-  Module &M;
-  FunctionAnalysisManager *FAM;
-  function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
-  function_ref<AAResults &(Function &)> GetAAR;
-  bool InsertLifetime;
-
-  SmallSetVector<Function *, 16> MaybeInlinedFunctions;
-  InlineFunctionInfo IFI;
-
-public:
-  InlinerHelper(Module &M, ProfileSummaryInfo &PSI,
-                FunctionAnalysisManager *FAM,
-                function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
-                function_ref<AAResults &(Function &)> GetAAR,
-                bool InsertLifetime)
-      : M(M), FAM(FAM), GetAssumptionCache(GetAssumptionCache), GetAAR(GetAAR),
-        InsertLifetime(InsertLifetime), IFI(GetAssumptionCache, &PSI) {}
-
-  bool canInline(Function &F) {
-    return !F.isPresplitCoroutine() && !F.isDeclaration() &&
-           isInlineViable(F).isSuccess();
-  }
+bool AlwaysInlineImpl(
+    Module &M, bool InsertLifetime, ProfileSummaryInfo &PSI,
+    FunctionAnalysisManager *FAM,
+    function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
+    function_ref<AAResults &(Function &)> GetAAR,
+    function_ref<TargetTransformInfo &(Function &)> GetTTI) {
+  SmallSetVector<CallBase *, 16> Calls;
+  bool Changed = false;
+  SmallVector<Function *, 16> InlinedComdatFunctions;
+  SmallVector<Function *, 4> NeedFlattening;
 
-  bool tryInline(CallBase &CB, StringRef InlignReason) {
-    IFI.reset();
-    Function &Callee = *CB.getCalledFunction();
+  auto TryInline = [&](CallBase &CB, Function &Callee,
+                       OptimizationRemarkEmitter &ORE, const char *InlineReason,
+                       SmallVectorImpl<CallBase *> *NewCallSites =
+                           nullptr) -> bool {
     Function *Caller = CB.getCaller();
-    OptimizationRemarkEmitter ORE(Caller);
     DebugLoc DLoc = CB.getDebugLoc();
     BasicBlock *Block = CB.getParent();
 
+    InlineFunctionInfo IFI(GetAssumptionCache, &PSI);
     InlineResult Res = InlineFunction(CB, IFI, /*MergeAttributes=*/true,
                                       &GetAAR(Callee), InsertLifetime);
     if (!Res.isSuccess()) {
@@ -76,74 +64,24 @@ class InlinerHelper {
     }
 
     emitInlinedIntoBasedOnCost(ORE, DLoc, Block, Callee, *Caller,
-                               InlineCost::getAlways(InlignReason.data()),
+                               InlineCost::getAlways(InlineReason),
                                /*ForProfileContext=*/false, DEBUG_TYPE);
     if (FAM)
       FAM->invalidate(*Caller, PreservedAnalyses::none());
+    if (NewCallSites)
+      *NewCallSites = std::move(IFI.InlinedCallSites);
     return true;
-  }
-
-  ArrayRef<CallBase *> getInlinedCallSites() const {
-    return IFI.InlinedCallSites;
-  }
-
-  void addToMaybeInlinedFunctions(Function &F) {
-    MaybeInlinedFunctions.insert(&F);
-  }
-
-  bool postInlinerCleanup() {
-    SmallVector<Function *, 16> InlinedComdatFunctions;
-    bool Changed = false;
-    for (Function *F : MaybeInlinedFunctions) {
-      F->removeDeadConstantUsers();
-      if (F->hasFnAttribute(Attribute::AlwaysInline) &&
-          F->isDefTriviallyDead()) {
-        if (F->hasComdat()) {
-          InlinedComdatFunctions.push_back(F);
-        } else {
-          if (FAM)
-            FAM->clear(*F, F->getName());
-          M.getFunctionList().erase(F);
-          Changed = true;
-        }
-      }
-    }
-    if (!InlinedComdatFunctions.empty()) {
-      // Now we just have the comdat functions. Filter out the ones whose
-      // comdats are not actually dead.
-      filterDeadComdatFunctions(InlinedComdatFunctions);
-      // The remaining functions are actually dead.
-      for (Function *F : InlinedComdatFunctions) {
-        if (FAM)
-          FAM->clear(*F, F->getName());
-        M.getFunctionList().erase(F);
-        Changed = true;
-      }
-    }
-    return Changed;
-  }
-};
-
-bool AlwaysInlineImpl(
-    Module &M, bool InsertLifetime, ProfileSummaryInfo &PSI,
-    FunctionAnalysisManager *FAM,
-    function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
-    function_ref<AAResults &(Function &)> GetAAR,
-    function_ref<TargetTransformInfo &(Function &)> GetTTI) {
-  SmallSetVector<CallBase *, 16> Calls;
-  InlinerHelper IH(M, PSI, FAM, GetAssumptionCache, GetAAR, InsertLifetime);
-  SmallVector<Function *, 4> NeedFlattening;
-
-  bool Changed = false;
-  SmallVector<Function *, 16> InlinedComdatFunctions;
+  };
 
   for (Function &F : make_early_inc_range(M)) {
     if (F.hasFnAttribute(Attribute::Flatten))
       NeedFlattening.push_back(&F);
 
-    if (!IH.canInline(F))
+    if (F.isPresplitCoroutine())
+      continue;
+
+    if (F.isDeclaration() || !isInlineViable(F).isSuccess())
       continue;
-    IH.addToMaybeInlinedFunctions(F);
 
     Calls.clear();
 
@@ -155,7 +93,20 @@ bool AlwaysInlineImpl(
           Calls.insert(CB);
 
     for (CallBase *CB : Calls) {
-      Changed |= IH.tryInline(*CB, "always inline attribute");
+      OptimizationRemarkEmitter ORE(CB->getCaller());
+      Changed |= TryInline(*CB, F, ORE, "always inline attribute");
+    }
+
+    F.removeDeadConstantUsers();
+    if (F.hasFnAttribute(Attribute::AlwaysInline) && F.isDefTriviallyDead()) {
+      if (F.hasComdat()) {
+        InlinedComdatFunctions.push_back(&F);
+      } else {
+        if (FAM)
+          FAM->clear(F, F.getName());
+        M.getFunctionList().erase(F);
+        Changed = true;
+      }
     }
   }
 
@@ -163,6 +114,7 @@ bool AlwaysInlineImpl(
   for (Function *F : NeedFlattening) {
     SmallVector<std::pair<CallBase *, int>, 16> Worklist;
     SmallVector<std::pair<Function *, int>, 16> InlineHistory;
+    SmallVector<CallBase *> NewCallSites;
     OptimizationRemarkEmitter ORE(F);
 
     // Collect initial calls.
@@ -199,7 +151,8 @@ bool AlwaysInlineImpl(
         continue;
       }
 
-      if (!IH.canInline(*Callee))
+      if (Callee->isPresplitCoroutine() || Callee->isDeclaration() ||
+          !isInlineViable(*Callee).isSuccess())
         continue;
 
       // Check TTI for target-specific inlining restrictions (e.g., SME ABI).
@@ -207,14 +160,12 @@ bool AlwaysInlineImpl(
       if (!TTI.areInlineCompatible(F, Callee))
         continue;
 
-      if (!IH.tryInline(*CB, "flatten attribute"))
+      if (!TryInline(*CB, *Callee, ORE, "flatten attribute", &NewCallSites))
         continue;
 
-      IH.addToMaybeInlinedFunctions(*Callee);
       Changed = true;
 
       // Add new call sites from the inlined function to the worklist.
-      ArrayRef<CallBase *> NewCallSites = IH.getInlinedCallSites();
       if (!NewCallSites.empty()) {
         int NewHistoryID = InlineHistory.size();
         InlineHistory.push_back({Callee, InlineHistoryID});
@@ -228,7 +179,16 @@ bool AlwaysInlineImpl(
     }
   }
 
-  Changed |= IH.postInlinerCleanup();
+  if (!InlinedComdatFunctions.empty()) {
+    filterDeadComdatFunctions(InlinedComdatFunctions);
+    for (Function *F : InlinedComdatFunctions) {
+      if (FAM)
+        FAM->clear(*F, F->getName());
+      M.getFunctionList().erase(F);
+      Changed = true;
+    }
+  }
+
   return Changed;
 }
 

>From 50122c434ab75c6b90e820f10240d6ed7c824ad5 Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Wed, 25 Feb 2026 09:34:44 -0800
Subject: [PATCH 13/16] Remove duplicate static inlineHistoryIncludes from
 ModuleInliner.cpp

---
 llvm/lib/Transforms/IPO/ModuleInliner.cpp | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/ModuleInliner.cpp b/llvm/lib/Transforms/IPO/ModuleInliner.cpp
index 3e0bb6d1432b2..31c26c9fb8c06 100644
--- a/llvm/lib/Transforms/IPO/ModuleInliner.cpp
+++ b/llvm/lib/Transforms/IPO/ModuleInliner.cpp
@@ -56,21 +56,6 @@ static cl::opt<bool> CtxProfPromoteAlwaysInline(
              "promotion for that target. If multiple targets for an indirect "
              "call site fit this description, they are all promoted."));
 
-/// Return true if the specified inline history ID
-/// indicates an inline history that includes the specified function.
-static bool inlineHistoryIncludes(
-    Function *F, int InlineHistoryID,
-    const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
-  while (InlineHistoryID != -1) {
-    assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
-           "Invalid inline history ID");
-    if (InlineHistory[InlineHistoryID].first == F)
-      return true;
-    InlineHistoryID = InlineHistory[InlineHistoryID].second;
-  }
-  return false;
-}
-
 InlineAdvisor &ModuleInlinerPass::getAdvisor(const ModuleAnalysisManager &MAM,
                                              FunctionAnalysisManager &FAM,
                                              Module &M) {

>From bf43762c3e62d41ded4546523e47ccb14b781438 Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Mon, 2 Mar 2026 11:20:46 -0800
Subject: [PATCH 14/16] Address review comments: add braces and remove
 redundant check

Add braces to multi-line for loops per LLVM style and remove
redundant Callee->isDeclaration() check since both worklist entry
points already filter out declarations.
---
 llvm/lib/Transforms/IPO/AlwaysInliner.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index 629befa66cfbc..5ea282a6160ec 100644
--- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -118,8 +118,8 @@ bool AlwaysInlineImpl(
     OptimizationRemarkEmitter ORE(F);
 
     // Collect initial calls.
-    for (BasicBlock &BB : *F)
-      for (Instruction &I : BB)
+    for (BasicBlock &BB : *F) {
+      for (Instruction &I : BB) {
         if (auto *CB = dyn_cast<CallBase>(&I)) {
           if (CB->getAttributes().hasFnAttr(Attribute::NoInline))
             continue;
@@ -128,6 +128,8 @@ bool AlwaysInlineImpl(
             continue;
           Worklist.push_back({CB, -1});
         }
+      }
+    }
 
     while (!Worklist.empty()) {
       auto Item = Worklist.pop_back_val();
@@ -151,8 +153,7 @@ bool AlwaysInlineImpl(
         continue;
       }
 
-      if (Callee->isPresplitCoroutine() || Callee->isDeclaration() ||
-          !isInlineViable(*Callee).isSuccess())
+      if (Callee->isPresplitCoroutine() || !isInlineViable(*Callee).isSuccess())
         continue;
 
       // Check TTI for target-specific inlining restrictions (e.g., SME ABI).

>From f1fedef981dbb8f5da8b3c231deac5ab34ba8325 Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Mon, 2 Mar 2026 14:27:17 -0800
Subject: [PATCH 15/16] Address review: structured bindings, restore comments,
 use ArrayRef

Use structured bindings for worklist pop, restore accidentally
dropped comdat comments, and change inlineHistoryIncludes to take
ArrayRef instead of const SmallVectorImpl&.
---
 llvm/include/llvm/Transforms/Utils/Cloning.h | 7 ++++---
 llvm/lib/Transforms/IPO/AlwaysInliner.cpp    | 7 ++++---
 llvm/lib/Transforms/Utils/InlineFunction.cpp | 2 +-
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h
index b0df674e9b0d7..434569fac8a71 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -17,6 +17,7 @@
 #ifndef LLVM_TRANSFORMS_UTILS_CLONING_H
 #define LLVM_TRANSFORMS_UTILS_CLONING_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/AssumptionCache.h"
@@ -443,9 +444,9 @@ LLVM_ABI void cloneAndAdaptNoAliasScopes(ArrayRef<MDNode *> NoAliasDeclScopes,
 /// InlineHistory is a vector of (Function, ParentHistoryID) pairs.
 /// Returns true if F was already inlined in the chain leading to
 /// InlineHistoryID.
-LLVM_ABI bool inlineHistoryIncludes(
-    Function *F, int InlineHistoryID,
-    const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory);
+LLVM_ABI bool
+inlineHistoryIncludes(Function *F, int InlineHistoryID,
+                      ArrayRef<std::pair<Function *, int>> InlineHistory);
 
 } // end namespace llvm
 
diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index 5ea282a6160ec..bda1d0b9573c0 100644
--- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -132,9 +132,7 @@ bool AlwaysInlineImpl(
     }
 
     while (!Worklist.empty()) {
-      auto Item = Worklist.pop_back_val();
-      CallBase *CB = Item.first;
-      int InlineHistoryID = Item.second;
+      auto [CB, InlineHistoryID] = Worklist.pop_back_val();
       Function *Callee = CB->getCalledFunction();
       if (!Callee)
         continue;
@@ -181,7 +179,10 @@ bool AlwaysInlineImpl(
   }
 
   if (!InlinedComdatFunctions.empty()) {
+    // Now we just have the comdat functions. Filter out the ones whose
+    // comdats are not actually dead.
     filterDeadComdatFunctions(InlinedComdatFunctions);
+    // The remaining functions are actually dead.
     for (Function *F : InlinedComdatFunctions) {
       if (FAM)
         FAM->clear(*F, F->getName());
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index a437e8d52f805..f3415cf4d5636 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -3446,7 +3446,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
 
 bool llvm::inlineHistoryIncludes(
     Function *F, int InlineHistoryID,
-    const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
+    ArrayRef<std::pair<Function *, int>> InlineHistory) {
   while (InlineHistoryID != -1) {
     assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
            "Invalid inline history ID");

>From d18d1bb67a4137d1defcff9e345474453df6f043 Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Mon, 2 Mar 2026 14:49:58 -0800
Subject: [PATCH 16/16] Regenerate flatten-sme.ll checks with
 update_test_checks.py

---
 .../Transforms/Inline/AArch64/flatten-sme.ll  | 65 +++++++++++++++----
 1 file changed, 51 insertions(+), 14 deletions(-)

diff --git a/llvm/test/Transforms/Inline/AArch64/flatten-sme.ll b/llvm/test/Transforms/Inline/AArch64/flatten-sme.ll
index dcb82e995b69f..e3d302f5eca79 100644
--- a/llvm/test/Transforms/Inline/AArch64/flatten-sme.ll
+++ b/llvm/test/Transforms/Inline/AArch64/flatten-sme.ll
@@ -1,54 +1,91 @@
-; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+sme -S -passes=always-inline | FileCheck %s
-; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+sme -S -passes=inline | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+sme -S -passes=always-inline | FileCheck %s --check-prefixes=CHECK,ALWAYS
+; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+sme -S -passes=inline | FileCheck %s --check-prefixes=CHECK,INLINE
 
 ; Test that flatten attribute respects ABI restrictions for SME.
 ; Streaming callee cannot be inlined into non-streaming caller.
 ; new_za callee cannot be inlined at all.
 
 define internal i32 @streaming_callee() "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: define internal i32 @streaming_callee(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    ret i32 42
+;
   ret i32 42
 }
 
 define internal i32 @new_za_callee() "aarch64_new_za" {
+; CHECK-LABEL: define internal i32 @new_za_callee(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:    ret i32 100
+;
   ret i32 100
 }
 
 define internal i32 @normal_callee() {
+; ALWAYS-LABEL: define internal i32 @normal_callee(
+; ALWAYS-SAME: ) #[[ATTR2:[0-9]+]] {
+; ALWAYS-NEXT:    ret i32 50
+;
   ret i32 50
 }
 
 ; Streaming callee -> non-streaming caller: should NOT be inlined (ABI violation).
 define i32 @test_streaming_not_inlined() flatten {
-; CHECK-LABEL: @test_streaming_not_inlined(
-; CHECK: call i32 @streaming_callee()
-; CHECK: ret i32
+; ALWAYS-LABEL: define i32 @test_streaming_not_inlined(
+; ALWAYS-SAME: ) #[[ATTR3:[0-9]+]] {
+; ALWAYS-NEXT:    [[R:%.*]] = call i32 @streaming_callee()
+; ALWAYS-NEXT:    ret i32 [[R]]
+;
+; INLINE-LABEL: define i32 @test_streaming_not_inlined(
+; INLINE-SAME: ) #[[ATTR2:[0-9]+]] {
+; INLINE-NEXT:    [[R:%.*]] = call i32 @streaming_callee()
+; INLINE-NEXT:    ret i32 [[R]]
+;
   %r = call i32 @streaming_callee()
   ret i32 %r
 }
 
 ; new_za callee: should NOT be inlined (ABI violation - callee allocates new ZA).
 define i32 @test_new_za_not_inlined() flatten {
-; CHECK-LABEL: @test_new_za_not_inlined(
-; CHECK: call i32 @new_za_callee()
-; CHECK: ret i32
+; ALWAYS-LABEL: define i32 @test_new_za_not_inlined(
+; ALWAYS-SAME: ) #[[ATTR3]] {
+; ALWAYS-NEXT:    [[R:%.*]] = call i32 @new_za_callee()
+; ALWAYS-NEXT:    ret i32 [[R]]
+;
+; INLINE-LABEL: define i32 @test_new_za_not_inlined(
+; INLINE-SAME: ) #[[ATTR2]] {
+; INLINE-NEXT:    [[R:%.*]] = call i32 @new_za_callee()
+; INLINE-NEXT:    ret i32 [[R]]
+;
   %r = call i32 @new_za_callee()
   ret i32 %r
 }
 
 ; Streaming caller -> streaming callee: should be inlined (compatible).
 define i32 @test_streaming_to_streaming() flatten "aarch64_pstate_sm_enabled" {
-; CHECK-LABEL: @test_streaming_to_streaming(
-; CHECK-NOT: call i32 @streaming_callee
-; CHECK: ret i32 42
+; ALWAYS-LABEL: define i32 @test_streaming_to_streaming(
+; ALWAYS-SAME: ) #[[ATTR4:[0-9]+]] {
+; ALWAYS-NEXT:    ret i32 42
+;
+; INLINE-LABEL: define i32 @test_streaming_to_streaming(
+; INLINE-SAME: ) #[[ATTR3:[0-9]+]] {
+; INLINE-NEXT:    ret i32 42
+;
   %r = call i32 @streaming_callee()
   ret i32 %r
 }
 
 ; Non-streaming caller -> non-streaming callee: should be inlined.
 define i32 @test_normal_inlined() flatten {
-; CHECK-LABEL: @test_normal_inlined(
-; CHECK-NOT: call i32 @normal_callee
-; CHECK: ret i32 50
+; ALWAYS-LABEL: define i32 @test_normal_inlined(
+; ALWAYS-SAME: ) #[[ATTR3]] {
+; ALWAYS-NEXT:    ret i32 50
+;
+; INLINE-LABEL: define i32 @test_normal_inlined(
+; INLINE-SAME: ) #[[ATTR2]] {
+; INLINE-NEXT:    ret i32 50
+;
   %r = call i32 @normal_callee()
   ret i32 %r
 }



More information about the llvm-commits mailing list