[llvm] Account for inline assembly instructions in inlining cost. (PR #146628)

Rahman Lavaee via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 9 10:27:52 PDT 2025


https://github.com/rlavaee updated https://github.com/llvm/llvm-project/pull/146628

>From 01ee7150f66370bc13f29d4605acc3ba69c35e94 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Tue, 1 Jul 2025 17:54:28 +0000
Subject: [PATCH 1/2] Account for inline assembly instructions in inlining
 cost.

Inliner currently treats every "call asm" IR instruction as a single
instruction regardless of how many instructions the inline assembly may
contain. This may underestimate the cost of inlining for a callee
containing long inline assembly. Besides, we may need to assign a higher
cost to instructions in inline assembly since they cannot be analyzed
and optimized by the compiler.

This PR introduces a new option `-inline-asm-instr-cost` -- set zero by
default, which can control the cost of inline assembly instructions in
inliner's cost-benefit analysis.
---
 llvm/lib/Analysis/InlineCost.cpp              | 50 +++++++++++++++++++
 .../Inline/inline-call-with-asm-call.ll       | 23 +++++++++
 2 files changed, 73 insertions(+)
 create mode 100644 llvm/test/Transforms/Inline/inline-call-with-asm-call.ll

diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 773a60479ae22..50a77805bfe78 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -37,6 +37,7 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Operator.h"
@@ -141,6 +142,10 @@ static cl::opt<int>
     InstrCost("inline-instr-cost", cl::Hidden, cl::init(5),
               cl::desc("Cost of a single instruction when inlining"));
 
+static cl::opt<int> InlineAsmInstrCost(
+    "inline-asm-instr-cost", cl::Hidden, cl::init(0),
+    cl::desc("Cost of a single inline asm instruction when inlining"));
+
 static cl::opt<int>
     MemAccessCost("inline-memaccess-cost", cl::Hidden, cl::init(0),
                   cl::desc("Cost of load/store instruction when inlining"));
@@ -351,6 +356,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   /// for.
   virtual void onMissedSimplification() {}
 
+  /// Account for inline assembly instructions.
+  virtual void onInlineAsm(InlineAsm &Arg) {}
+
   /// Start accounting potential benefits due to SROA for the given alloca.
   virtual void onInitializeSROAArg(AllocaInst *Arg) {}
 
@@ -382,6 +390,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   /// Number of bytes allocated statically by the callee.
   uint64_t AllocatedSize = 0;
   unsigned NumInstructions = 0;
+  unsigned NumInlineAsmInstructions = 0;
   unsigned NumVectorInstructions = 0;
 
   /// While we walk the potentially-inlined instructions, we build up and
@@ -777,6 +786,42 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
 
     addCost(SwitchCost);
   }
+
+  // Parses the inline assembly argument to account for its cost. Inline
+  // assembly instructions incur higher costs for inlining since they cannot be
+  // analyzed and optimized.
+  void onInlineAsm(InlineAsm &Arg) override {
+    SmallVector<StringRef, 4> Fragments;
+    Arg.getAsmString().split(Fragments, "\n");
+    int SectionLevel = 0;
+    int InlineAsmInstrCount = 0;
+    for (const auto &Fragment : Fragments) {
+      // Trim whitespaces and comments.
+      auto Trimmed = Fragment.trim();
+      size_t hashPos = Trimmed.find('#');
+      if (hashPos != StringRef::npos)
+        Trimmed = Trimmed.substr(0, hashPos);
+      // Ignore comments.
+      if (Trimmed.empty())
+        continue;
+      if (Trimmed.starts_with(".pushsection")) {
+        ++SectionLevel;
+        continue;
+      }
+      if (Trimmed.starts_with(".popsection")) {
+        --SectionLevel;
+        continue;
+      }
+      // Ignore directives and labels.
+      if (Trimmed.starts_with(".") || Trimmed.contains(":"))
+        continue;
+      if (SectionLevel == 0)
+        ++InlineAsmInstrCount;
+    }
+    NumInlineAsmInstructions += InlineAsmInstrCount;
+    addCost(InlineAsmInstrCount * InlineAsmInstrCost);
+  }
+
   void onMissedSimplification() override { addCost(InstrCost); }
 
   void onInitializeSROAArg(AllocaInst *Arg) override {
@@ -2420,6 +2465,10 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) {
   if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
     ContainsNoDuplicateCall = true;
 
+  if (InlineAsm *InlineAsmOp = dyn_cast<InlineAsm>(Call.getCalledOperand())) {
+    onInlineAsm(*InlineAsmOp);
+  }
+
   Function *F = Call.getCalledFunction();
   bool IsIndirectCall = !F;
   if (IsIndirectCall) {
@@ -3005,6 +3054,7 @@ void InlineCostCallAnalyzer::print(raw_ostream &OS) {
   DEBUG_PRINT_STAT(NumConstantPtrDiffs);
   DEBUG_PRINT_STAT(NumInstructionsSimplified);
   DEBUG_PRINT_STAT(NumInstructions);
+  DEBUG_PRINT_STAT(NumInlineAsmInstructions);
   DEBUG_PRINT_STAT(SROACostSavings);
   DEBUG_PRINT_STAT(SROACostSavingsLost);
   DEBUG_PRINT_STAT(LoadEliminationCost);
diff --git a/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll b/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll
new file mode 100644
index 0000000000000..93bbd0e028fdf
--- /dev/null
+++ b/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll
@@ -0,0 +1,23 @@
+;; Test to verify that when callee has inline assembly, bumping up `-inline-asm-instr-cost` would block inlining.
+
+; RUN: opt < %s -passes=inline -S | FileCheck %s --check-prefixes=INLINE
+; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s --check-prefixes=INLINE
+; RUN: opt < %s -passes=inline -inline-asm-instr-cost=150 -S | FileCheck %s --check-prefixes=INLINE
+; RUN: opt < %s -passes='cgscc(inline)' -inline-asm-instr-cost=150 -S | FileCheck %s --check-prefixes=INLINE
+; RUN: opt < %s -passes=inline -inline-asm-instr-cost=300 -S | FileCheck %s --check-prefixes=NOINLINE
+; RUN: opt < %s -passes='cgscc(inline)' -inline-asm-instr-cost=300 -S | FileCheck %s --check-prefixes=NOINLINE
+
+; CHECK-LABEL: caller
+; CHECK-NOT: callee
+; INLINE: call void asm
+; NOINLINE: call void @callee
+
+define void @caller(i32 %a, i1 %b) #0 {
+  call void @callee(i32 %a, i1 %b)
+  ret void
+}
+
+define void @callee(i32 %a, i1 %b) {
+  call void asm sideeffect "s_nop 1\0A\09.pushsection other\0A\09s_nop 2\0A\09s_nop 3\0A\09.popsection\0A\09s_nop 4\0A\09.align 32", ""()
+  ret void
+}

>From 2a777632c789490f69f14a0135315f2ae433196f Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Mon, 7 Jul 2025 19:43:35 +0000
Subject: [PATCH 2/2] Add comments to explain the logic behind section level
 parsing.

---
 llvm/lib/Analysis/InlineCost.cpp              | 21 +++++++-----
 .../Inline/inline-call-with-asm-call.ll       | 34 +++++++++++++------
 2 files changed, 36 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 50a77805bfe78..22f4d08448a22 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -357,7 +357,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   virtual void onMissedSimplification() {}
 
   /// Account for inline assembly instructions.
-  virtual void onInlineAsm(InlineAsm &Arg) {}
+  virtual void onInlineAsm(const InlineAsm &Arg) {}
 
   /// Start accounting potential benefits due to SROA for the given alloca.
   virtual void onInitializeSROAArg(AllocaInst *Arg) {}
@@ -790,20 +790,26 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
   // Parses the inline assembly argument to account for its cost. Inline
   // assembly instructions incur higher costs for inlining since they cannot be
   // analyzed and optimized.
-  void onInlineAsm(InlineAsm &Arg) override {
-    SmallVector<StringRef, 4> Fragments;
-    Arg.getAsmString().split(Fragments, "\n");
+  void onInlineAsm(const InlineAsm &Arg) override {
+    if (!InlineAsmInstrCost)
+      return;
+    SmallVector<StringRef, 4> AsmStrs;
+    Arg.collectAsmStrs(AsmStrs);
     int SectionLevel = 0;
     int InlineAsmInstrCount = 0;
-    for (const auto &Fragment : Fragments) {
+    for (StringRef AsmStr : AsmStrs) {
       // Trim whitespaces and comments.
-      auto Trimmed = Fragment.trim();
+      StringRef Trimmed = AsmStr.trim();
       size_t hashPos = Trimmed.find('#');
       if (hashPos != StringRef::npos)
         Trimmed = Trimmed.substr(0, hashPos);
       // Ignore comments.
       if (Trimmed.empty())
         continue;
+      // Filter out the outlined assembly instructions from the cost by keeping
+      // track of the section level and only accounting for instrutions at
+      // section level of zero. Note there will be duplication in outlined
+      // sections too, but is not accounted in the inlining cost model.
       if (Trimmed.starts_with(".pushsection")) {
         ++SectionLevel;
         continue;
@@ -2465,9 +2471,8 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) {
   if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
     ContainsNoDuplicateCall = true;
 
-  if (InlineAsm *InlineAsmOp = dyn_cast<InlineAsm>(Call.getCalledOperand())) {
+  if (InlineAsm *InlineAsmOp = dyn_cast<InlineAsm>(Call.getCalledOperand()))
     onInlineAsm(*InlineAsmOp);
-  }
 
   Function *F = Call.getCalledFunction();
   bool IsIndirectCall = !F;
diff --git a/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll b/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll
index 93bbd0e028fdf..7d8121d04996e 100644
--- a/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll
+++ b/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll
@@ -1,23 +1,35 @@
 ;; Test to verify that when callee has inline assembly, bumping up `-inline-asm-instr-cost` would block inlining.
 
-; RUN: opt < %s -passes=inline -S | FileCheck %s --check-prefixes=INLINE
-; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s --check-prefixes=INLINE
-; RUN: opt < %s -passes=inline -inline-asm-instr-cost=150 -S | FileCheck %s --check-prefixes=INLINE
-; RUN: opt < %s -passes='cgscc(inline)' -inline-asm-instr-cost=150 -S | FileCheck %s --check-prefixes=INLINE
-; RUN: opt < %s -passes=inline -inline-asm-instr-cost=300 -S | FileCheck %s --check-prefixes=NOINLINE
-; RUN: opt < %s -passes='cgscc(inline)' -inline-asm-instr-cost=300 -S | FileCheck %s --check-prefixes=NOINLINE
-
-; CHECK-LABEL: caller
-; CHECK-NOT: callee
-; INLINE: call void asm
-; NOINLINE: call void @callee
+; RUN: opt < %s -passes=inline -S | FileCheck %s --check-prefixes=CHECK,INLINE
+; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s --check-prefixes=CHECK,INLINE
+
+;; Verify that a low assembly instruction cost of 150 does not block inlining.
+;; This test also verifies that the outlined section's instructions (in "other"
+;; section) do not contribute to the cost.
+; RUN: opt < %s -passes=inline -inline-asm-instr-cost=150 -S | FileCheck %s --check-prefixes=CHECK,INLINE
+; RUN: opt < %s -passes='cgscc(inline)' -inline-asm-instr-cost=150 -S | FileCheck %s --check-prefixes=CHECK,INLINE
+
+;; Verify that an assembly instruction cost of 300 blocks inlining.
+; RUN: opt < %s -passes=inline -inline-asm-instr-cost=300 -S | FileCheck %s --check-prefixes=CHECK,NOINLINE
+; RUN: opt < %s -passes='cgscc(inline)' -inline-asm-instr-cost=300 -S | FileCheck %s --check-prefixes=CHECK,NOINLINE
 
 define void @caller(i32 %a, i1 %b) #0 {
   call void @callee(i32 %a, i1 %b)
   ret void
 }
 
+; CHECK: define void @caller
+; INLINE: call void asm
+; NOINLINE: call void @callee
+
+
+;; callee function with asm call with two real assembly instructions in the
+;; destination section and two assembly instructions in the outlined "other"
+;; section.
 define void @callee(i32 %a, i1 %b) {
   call void asm sideeffect "s_nop 1\0A\09.pushsection other\0A\09s_nop 2\0A\09s_nop 3\0A\09.popsection\0A\09s_nop 4\0A\09.align 32", ""()
   ret void
 }
+; CHECK: define void @callee
+
+



More information about the llvm-commits mailing list