[llvm] cd9236d - Account for inline assembly instructions in inlining cost. (#146628)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 9 10:48:10 PDT 2025


Author: Rahman Lavaee
Date: 2025-07-09T10:48:07-07:00
New Revision: cd9236d78833a3f312d0a38e53e3f12e9926bcf3

URL: https://github.com/llvm/llvm-project/commit/cd9236d78833a3f312d0a38e53e3f12e9926bcf3
DIFF: https://github.com/llvm/llvm-project/commit/cd9236d78833a3f312d0a38e53e3f12e9926bcf3.diff

LOG: Account for inline assembly instructions in inlining cost. (#146628)

Inliner currently treats every "call asm" IR instruction as a single
instruction regardless of how many instructions the inline assembly may
contain. This may underestimate the cost of inlining for a callee
containing long inline assembly. Besides, we may need to assign a higher
cost to instructions in inline assembly since they cannot be analyzed
and optimized by the compiler.

This PR introduces a new option `-inline-asm-instr-cost` -- set zero by
default, which can control the cost of inline assembly instructions in
inliner's cost-benefit analysis.

Added: 
    llvm/test/Transforms/Inline/inline-call-with-asm-call.ll

Modified: 
    llvm/lib/Analysis/InlineCost.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 773a60479ae22..22f4d08448a22 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -37,6 +37,7 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Operator.h"
@@ -141,6 +142,10 @@ static cl::opt<int>
     InstrCost("inline-instr-cost", cl::Hidden, cl::init(5),
               cl::desc("Cost of a single instruction when inlining"));
 
+static cl::opt<int> InlineAsmInstrCost(
+    "inline-asm-instr-cost", cl::Hidden, cl::init(0),
+    cl::desc("Cost of a single inline asm instruction when inlining"));
+
 static cl::opt<int>
     MemAccessCost("inline-memaccess-cost", cl::Hidden, cl::init(0),
                   cl::desc("Cost of load/store instruction when inlining"));
@@ -351,6 +356,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   /// for.
   virtual void onMissedSimplification() {}
 
+  /// Account for inline assembly instructions.
+  virtual void onInlineAsm(const InlineAsm &Arg) {}
+
   /// Start accounting potential benefits due to SROA for the given alloca.
   virtual void onInitializeSROAArg(AllocaInst *Arg) {}
 
@@ -382,6 +390,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   /// Number of bytes allocated statically by the callee.
   uint64_t AllocatedSize = 0;
   unsigned NumInstructions = 0;
+  unsigned NumInlineAsmInstructions = 0;
   unsigned NumVectorInstructions = 0;
 
   /// While we walk the potentially-inlined instructions, we build up and
@@ -777,6 +786,48 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
 
     addCost(SwitchCost);
   }
+
+  // Parses the inline assembly argument to account for its cost. Inline
+  // assembly instructions incur higher costs for inlining since they cannot be
+  // analyzed and optimized.
+  void onInlineAsm(const InlineAsm &Arg) override {
+    if (!InlineAsmInstrCost)
+      return;
+    SmallVector<StringRef, 4> AsmStrs;
+    Arg.collectAsmStrs(AsmStrs);
+    int SectionLevel = 0;
+    int InlineAsmInstrCount = 0;
+    for (StringRef AsmStr : AsmStrs) {
+      // Trim whitespaces and comments.
+      StringRef Trimmed = AsmStr.trim();
+      size_t hashPos = Trimmed.find('#');
+      if (hashPos != StringRef::npos)
+        Trimmed = Trimmed.substr(0, hashPos);
+      // Ignore comments.
+      if (Trimmed.empty())
+        continue;
+      // Filter out the outlined assembly instructions from the cost by keeping
+      // track of the section level and only accounting for instrutions at
+      // section level of zero. Note there will be duplication in outlined
+      // sections too, but is not accounted in the inlining cost model.
+      if (Trimmed.starts_with(".pushsection")) {
+        ++SectionLevel;
+        continue;
+      }
+      if (Trimmed.starts_with(".popsection")) {
+        --SectionLevel;
+        continue;
+      }
+      // Ignore directives and labels.
+      if (Trimmed.starts_with(".") || Trimmed.contains(":"))
+        continue;
+      if (SectionLevel == 0)
+        ++InlineAsmInstrCount;
+    }
+    NumInlineAsmInstructions += InlineAsmInstrCount;
+    addCost(InlineAsmInstrCount * InlineAsmInstrCost);
+  }
+
   void onMissedSimplification() override { addCost(InstrCost); }
 
   void onInitializeSROAArg(AllocaInst *Arg) override {
@@ -2420,6 +2471,9 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) {
   if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
     ContainsNoDuplicateCall = true;
 
+  if (InlineAsm *InlineAsmOp = dyn_cast<InlineAsm>(Call.getCalledOperand()))
+    onInlineAsm(*InlineAsmOp);
+
   Function *F = Call.getCalledFunction();
   bool IsIndirectCall = !F;
   if (IsIndirectCall) {
@@ -3005,6 +3059,7 @@ void InlineCostCallAnalyzer::print(raw_ostream &OS) {
   DEBUG_PRINT_STAT(NumConstantPtrDiffs);
   DEBUG_PRINT_STAT(NumInstructionsSimplified);
   DEBUG_PRINT_STAT(NumInstructions);
+  DEBUG_PRINT_STAT(NumInlineAsmInstructions);
   DEBUG_PRINT_STAT(SROACostSavings);
   DEBUG_PRINT_STAT(SROACostSavingsLost);
   DEBUG_PRINT_STAT(LoadEliminationCost);

diff  --git a/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll b/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll
new file mode 100644
index 0000000000000..7d8121d04996e
--- /dev/null
+++ b/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll
@@ -0,0 +1,35 @@
+;; Test to verify that when callee has inline assembly, bumping up `-inline-asm-instr-cost` would block inlining.
+
+; RUN: opt < %s -passes=inline -S | FileCheck %s --check-prefixes=CHECK,INLINE
+; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s --check-prefixes=CHECK,INLINE
+
+;; Verify that a low assembly instruction cost of 150 does not block inlining.
+;; This test also verifies that the outlined section's instructions (in "other"
+;; section) do not contribute to the cost.
+; RUN: opt < %s -passes=inline -inline-asm-instr-cost=150 -S | FileCheck %s --check-prefixes=CHECK,INLINE
+; RUN: opt < %s -passes='cgscc(inline)' -inline-asm-instr-cost=150 -S | FileCheck %s --check-prefixes=CHECK,INLINE
+
+;; Verify that an assembly instruction cost of 300 blocks inlining.
+; RUN: opt < %s -passes=inline -inline-asm-instr-cost=300 -S | FileCheck %s --check-prefixes=CHECK,NOINLINE
+; RUN: opt < %s -passes='cgscc(inline)' -inline-asm-instr-cost=300 -S | FileCheck %s --check-prefixes=CHECK,NOINLINE
+
+define void @caller(i32 %a, i1 %b) #0 {
+  call void @callee(i32 %a, i1 %b)
+  ret void
+}
+
+; CHECK: define void @caller
+; INLINE: call void asm
+; NOINLINE: call void @callee
+
+
+;; callee function with asm call with two real assembly instructions in the
+;; destination section and two assembly instructions in the outlined "other"
+;; section.
+define void @callee(i32 %a, i1 %b) {
+  call void asm sideeffect "s_nop 1\0A\09.pushsection other\0A\09s_nop 2\0A\09s_nop 3\0A\09.popsection\0A\09s_nop 4\0A\09.align 32", ""()
+  ret void
+}
+; CHECK: define void @callee
+
+


        


More information about the llvm-commits mailing list