[llvm] Account for inline assembly instructions in inlining cost. (PR #146628)
Rahman Lavaee via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 9 10:27:52 PDT 2025
https://github.com/rlavaee updated https://github.com/llvm/llvm-project/pull/146628
>From 01ee7150f66370bc13f29d4605acc3ba69c35e94 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Tue, 1 Jul 2025 17:54:28 +0000
Subject: [PATCH 1/2] Account for inline assembly instructions in inlining
cost.
Inliner currently treats every "call asm" IR instruction as a single
instruction regardless of how many instructions the inline assembly may
contain. This may underestimate the cost of inlining for a callee
containing long inline assembly. Besides, we may need to assign a higher
cost to instructions in inline assembly since they cannot be analyzed
and optimized by the compiler.
This PR introduces a new option `-inline-asm-instr-cost` -- set zero by
default, which can control the cost of inline assembly instructions in
inliner's cost-benefit analysis.
---
llvm/lib/Analysis/InlineCost.cpp | 50 +++++++++++++++++++
.../Inline/inline-call-with-asm-call.ll | 23 +++++++++
2 files changed, 73 insertions(+)
create mode 100644 llvm/test/Transforms/Inline/inline-call-with-asm-call.ll
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 773a60479ae22..50a77805bfe78 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -37,6 +37,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
@@ -141,6 +142,10 @@ static cl::opt<int>
InstrCost("inline-instr-cost", cl::Hidden, cl::init(5),
cl::desc("Cost of a single instruction when inlining"));
+static cl::opt<int> InlineAsmInstrCost(
+ "inline-asm-instr-cost", cl::Hidden, cl::init(0),
+ cl::desc("Cost of a single inline asm instruction when inlining"));
+
static cl::opt<int>
MemAccessCost("inline-memaccess-cost", cl::Hidden, cl::init(0),
cl::desc("Cost of load/store instruction when inlining"));
@@ -351,6 +356,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// for.
virtual void onMissedSimplification() {}
+ /// Account for inline assembly instructions.
+ virtual void onInlineAsm(InlineAsm &Arg) {}
+
/// Start accounting potential benefits due to SROA for the given alloca.
virtual void onInitializeSROAArg(AllocaInst *Arg) {}
@@ -382,6 +390,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// Number of bytes allocated statically by the callee.
uint64_t AllocatedSize = 0;
unsigned NumInstructions = 0;
+ unsigned NumInlineAsmInstructions = 0;
unsigned NumVectorInstructions = 0;
/// While we walk the potentially-inlined instructions, we build up and
@@ -777,6 +786,42 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
addCost(SwitchCost);
}
+
+ // Parses the inline assembly argument to account for its cost. Inline
+ // assembly instructions incur higher costs for inlining since they cannot be
+ // analyzed and optimized.
+ void onInlineAsm(InlineAsm &Arg) override {
+ SmallVector<StringRef, 4> Fragments;
+ Arg.getAsmString().split(Fragments, "\n");
+ int SectionLevel = 0;
+ int InlineAsmInstrCount = 0;
+ for (const auto &Fragment : Fragments) {
+ // Trim whitespaces and comments.
+ auto Trimmed = Fragment.trim();
+ size_t hashPos = Trimmed.find('#');
+ if (hashPos != StringRef::npos)
+ Trimmed = Trimmed.substr(0, hashPos);
+ // Ignore comments.
+ if (Trimmed.empty())
+ continue;
+ if (Trimmed.starts_with(".pushsection")) {
+ ++SectionLevel;
+ continue;
+ }
+ if (Trimmed.starts_with(".popsection")) {
+ --SectionLevel;
+ continue;
+ }
+ // Ignore directives and labels.
+ if (Trimmed.starts_with(".") || Trimmed.contains(":"))
+ continue;
+ if (SectionLevel == 0)
+ ++InlineAsmInstrCount;
+ }
+ NumInlineAsmInstructions += InlineAsmInstrCount;
+ addCost(InlineAsmInstrCount * InlineAsmInstrCost);
+ }
+
void onMissedSimplification() override { addCost(InstrCost); }
void onInitializeSROAArg(AllocaInst *Arg) override {
@@ -2420,6 +2465,10 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) {
if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
ContainsNoDuplicateCall = true;
+ if (InlineAsm *InlineAsmOp = dyn_cast<InlineAsm>(Call.getCalledOperand())) {
+ onInlineAsm(*InlineAsmOp);
+ }
+
Function *F = Call.getCalledFunction();
bool IsIndirectCall = !F;
if (IsIndirectCall) {
@@ -3005,6 +3054,7 @@ void InlineCostCallAnalyzer::print(raw_ostream &OS) {
DEBUG_PRINT_STAT(NumConstantPtrDiffs);
DEBUG_PRINT_STAT(NumInstructionsSimplified);
DEBUG_PRINT_STAT(NumInstructions);
+ DEBUG_PRINT_STAT(NumInlineAsmInstructions);
DEBUG_PRINT_STAT(SROACostSavings);
DEBUG_PRINT_STAT(SROACostSavingsLost);
DEBUG_PRINT_STAT(LoadEliminationCost);
diff --git a/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll b/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll
new file mode 100644
index 0000000000000..93bbd0e028fdf
--- /dev/null
+++ b/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll
@@ -0,0 +1,23 @@
+;; Test to verify that when callee has inline assembly, bumping up `-inline-asm-instr-cost` would block inlining.
+
+; RUN: opt < %s -passes=inline -S | FileCheck %s --check-prefixes=INLINE
+; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s --check-prefixes=INLINE
+; RUN: opt < %s -passes=inline -inline-asm-instr-cost=150 -S | FileCheck %s --check-prefixes=INLINE
+; RUN: opt < %s -passes='cgscc(inline)' -inline-asm-instr-cost=150 -S | FileCheck %s --check-prefixes=INLINE
+; RUN: opt < %s -passes=inline -inline-asm-instr-cost=300 -S | FileCheck %s --check-prefixes=NOINLINE
+; RUN: opt < %s -passes='cgscc(inline)' -inline-asm-instr-cost=300 -S | FileCheck %s --check-prefixes=NOINLINE
+
+; CHECK-LABEL: caller
+; CHECK-NOT: callee
+; INLINE: call void asm
+; NOINLINE: call void @callee
+
+define void @caller(i32 %a, i1 %b) #0 {
+ call void @callee(i32 %a, i1 %b)
+ ret void
+}
+
+define void @callee(i32 %a, i1 %b) {
+ call void asm sideeffect "s_nop 1\0A\09.pushsection other\0A\09s_nop 2\0A\09s_nop 3\0A\09.popsection\0A\09s_nop 4\0A\09.align 32", ""()
+ ret void
+}
>From 2a777632c789490f69f14a0135315f2ae433196f Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Mon, 7 Jul 2025 19:43:35 +0000
Subject: [PATCH 2/2] Add comments to explain the logic behind section level
parsing.
---
llvm/lib/Analysis/InlineCost.cpp | 21 +++++++-----
.../Inline/inline-call-with-asm-call.ll | 34 +++++++++++++------
2 files changed, 36 insertions(+), 19 deletions(-)
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 50a77805bfe78..22f4d08448a22 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -357,7 +357,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
virtual void onMissedSimplification() {}
/// Account for inline assembly instructions.
- virtual void onInlineAsm(InlineAsm &Arg) {}
+ virtual void onInlineAsm(const InlineAsm &Arg) {}
/// Start accounting potential benefits due to SROA for the given alloca.
virtual void onInitializeSROAArg(AllocaInst *Arg) {}
@@ -790,20 +790,26 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
// Parses the inline assembly argument to account for its cost. Inline
// assembly instructions incur higher costs for inlining since they cannot be
// analyzed and optimized.
- void onInlineAsm(InlineAsm &Arg) override {
- SmallVector<StringRef, 4> Fragments;
- Arg.getAsmString().split(Fragments, "\n");
+ void onInlineAsm(const InlineAsm &Arg) override {
+ if (!InlineAsmInstrCost)
+ return;
+ SmallVector<StringRef, 4> AsmStrs;
+ Arg.collectAsmStrs(AsmStrs);
int SectionLevel = 0;
int InlineAsmInstrCount = 0;
- for (const auto &Fragment : Fragments) {
+ for (StringRef AsmStr : AsmStrs) {
// Trim whitespaces and comments.
- auto Trimmed = Fragment.trim();
+ StringRef Trimmed = AsmStr.trim();
size_t hashPos = Trimmed.find('#');
if (hashPos != StringRef::npos)
Trimmed = Trimmed.substr(0, hashPos);
// Ignore comments.
if (Trimmed.empty())
continue;
+ // Filter out the outlined assembly instructions from the cost by keeping
+ // track of the section level and only accounting for instrutions at
+ // section level of zero. Note there will be duplication in outlined
+ // sections too, but is not accounted in the inlining cost model.
if (Trimmed.starts_with(".pushsection")) {
++SectionLevel;
continue;
@@ -2465,9 +2471,8 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) {
if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
ContainsNoDuplicateCall = true;
- if (InlineAsm *InlineAsmOp = dyn_cast<InlineAsm>(Call.getCalledOperand())) {
+ if (InlineAsm *InlineAsmOp = dyn_cast<InlineAsm>(Call.getCalledOperand()))
onInlineAsm(*InlineAsmOp);
- }
Function *F = Call.getCalledFunction();
bool IsIndirectCall = !F;
diff --git a/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll b/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll
index 93bbd0e028fdf..7d8121d04996e 100644
--- a/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll
+++ b/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll
@@ -1,23 +1,35 @@
;; Test to verify that when callee has inline assembly, bumping up `-inline-asm-instr-cost` would block inlining.
-; RUN: opt < %s -passes=inline -S | FileCheck %s --check-prefixes=INLINE
-; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s --check-prefixes=INLINE
-; RUN: opt < %s -passes=inline -inline-asm-instr-cost=150 -S | FileCheck %s --check-prefixes=INLINE
-; RUN: opt < %s -passes='cgscc(inline)' -inline-asm-instr-cost=150 -S | FileCheck %s --check-prefixes=INLINE
-; RUN: opt < %s -passes=inline -inline-asm-instr-cost=300 -S | FileCheck %s --check-prefixes=NOINLINE
-; RUN: opt < %s -passes='cgscc(inline)' -inline-asm-instr-cost=300 -S | FileCheck %s --check-prefixes=NOINLINE
-
-; CHECK-LABEL: caller
-; CHECK-NOT: callee
-; INLINE: call void asm
-; NOINLINE: call void @callee
+; RUN: opt < %s -passes=inline -S | FileCheck %s --check-prefixes=CHECK,INLINE
+; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s --check-prefixes=CHECK,INLINE
+
+;; Verify that a low assembly instruction cost of 150 does not block inlining.
+;; This test also verifies that the outlined section's instructions (in "other"
+;; section) do not contribute to the cost.
+; RUN: opt < %s -passes=inline -inline-asm-instr-cost=150 -S | FileCheck %s --check-prefixes=CHECK,INLINE
+; RUN: opt < %s -passes='cgscc(inline)' -inline-asm-instr-cost=150 -S | FileCheck %s --check-prefixes=CHECK,INLINE
+
+;; Verify that an assembly instruction cost of 300 blocks inlining.
+; RUN: opt < %s -passes=inline -inline-asm-instr-cost=300 -S | FileCheck %s --check-prefixes=CHECK,NOINLINE
+; RUN: opt < %s -passes='cgscc(inline)' -inline-asm-instr-cost=300 -S | FileCheck %s --check-prefixes=CHECK,NOINLINE
define void @caller(i32 %a, i1 %b) #0 {
call void @callee(i32 %a, i1 %b)
ret void
}
+; CHECK: define void @caller
+; INLINE: call void asm
+; NOINLINE: call void @callee
+
+
+;; callee function with asm call with two real assembly instructions in the
+;; destination section and two assembly instructions in the outlined "other"
+;; section.
define void @callee(i32 %a, i1 %b) {
call void asm sideeffect "s_nop 1\0A\09.pushsection other\0A\09s_nop 2\0A\09s_nop 3\0A\09.popsection\0A\09s_nop 4\0A\09.align 32", ""()
ret void
}
+; CHECK: define void @callee
+
+
More information about the llvm-commits
mailing list