[llvm] Treat ';' and '\n' as assembly instruction separators in collectAsmInstrs (PR #149365)
Rahman Lavaee via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 18 10:30:23 PDT 2025
https://github.com/rlavaee updated https://github.com/llvm/llvm-project/pull/149365
>From 0d6b846c0d7344a9a15e83121068e78523fd4328 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 17 Jul 2025 05:32:13 +0000
Subject: [PATCH 1/3] Treat ';' and '\n' as assembly instruction separators in
collectAsmInstrs
This also fixes the incorrect treatment of '\n\t' as a separator for asm
instructions.
---
llvm/include/llvm/IR/InlineAsm.h | 7 +++-
llvm/lib/Analysis/InlineCost.cpp | 21 ++++-------
.../SelectionDAG/SelectionDAGBuilder.cpp | 5 ++-
llvm/lib/IR/InlineAsm.cpp | 35 +++++++++++++------
.../X86/inline-asm-function-call-pic.ll | 2 +-
.../CodeGen/X86/inline-asm-p-constraint.ll | 5 ++-
.../Inline/inline-call-with-asm-call.ll | 2 +-
7 files changed, 43 insertions(+), 34 deletions(-)
diff --git a/llvm/include/llvm/IR/InlineAsm.h b/llvm/include/llvm/IR/InlineAsm.h
index 96887d129a69f..6144d2f07335b 100644
--- a/llvm/include/llvm/IR/InlineAsm.h
+++ b/llvm/include/llvm/IR/InlineAsm.h
@@ -87,7 +87,12 @@ class InlineAsm final : public Value {
StringRef getAsmString() const { return AsmString; }
StringRef getConstraintString() const { return Constraints; }
- LLVM_ABI void collectAsmStrs(SmallVectorImpl<StringRef> &AsmStrs) const;
+
+ /// collectAsmInstrs - Parses the assembly instruction and collects individual
+ /// instructions in a vector. Handles both '\n' and ';' as instruction
+ /// separators. Trims comments (marked by '#' and "//") and whitespaces from
+ /// instructions.
+ LLVM_ABI SmallVector<StringRef> collectAsmInstrs() const;
/// This static method can be used by the parser to check to see if the
/// specified constraint string is legal for the type.
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 22f4d08448a22..35e3a8f731db3 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -793,33 +793,24 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
void onInlineAsm(const InlineAsm &Arg) override {
if (!InlineAsmInstrCost)
return;
- SmallVector<StringRef, 4> AsmStrs;
- Arg.collectAsmStrs(AsmStrs);
int SectionLevel = 0;
int InlineAsmInstrCount = 0;
- for (StringRef AsmStr : AsmStrs) {
- // Trim whitespaces and comments.
- StringRef Trimmed = AsmStr.trim();
- size_t hashPos = Trimmed.find('#');
- if (hashPos != StringRef::npos)
- Trimmed = Trimmed.substr(0, hashPos);
- // Ignore comments.
- if (Trimmed.empty())
- continue;
+ for (StringRef AsmInstr : Arg.collectAsmInstrs()) {
// Filter out the outlined assembly instructions from the cost by keeping
// track of the section level and only accounting for instrutions at
// section level of zero. Note there will be duplication in outlined
// sections too, but is not accounted in the inlining cost model.
- if (Trimmed.starts_with(".pushsection")) {
+ if (AsmInstr.starts_with(".pushsection")) {
++SectionLevel;
continue;
}
- if (Trimmed.starts_with(".popsection")) {
+ if (AsmInstr.starts_with(".popsection")) {
--SectionLevel;
continue;
}
- // Ignore directives and labels.
- if (Trimmed.starts_with(".") || Trimmed.contains(":"))
+ // Labels are free. Note we only exclude labels that are not followed by
+ // any other instruction.
+ if (AsmInstr.ends_with(":"))
continue;
if (SectionLevel == 0)
++InlineAsmInstrCount;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 74c14ede24755..24dfd3a757cdd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -10006,8 +10006,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
}
int OpNo = -1;
- SmallVector<StringRef> AsmStrs;
- IA->collectAsmStrs(AsmStrs);
+ SmallVector<StringRef> AsmInstrs = IA->collectAsmInstrs();
// Second pass over the constraints: compute which constraint option to use.
for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
@@ -10051,7 +10050,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
// label, so here we don't handle jmp function label now, but we need to
// enhance it (especilly in PIC model) if we meet meaningful requirements.
if (OpInfo.isIndirect && isFunction(OpInfo.CallOperand) &&
- TLI.isInlineAsmTargetBranch(AsmStrs, OpNo) &&
+ TLI.isInlineAsmTargetBranch(AsmInstrs, OpNo) &&
TM.getCodeModel() != CodeModel::Large) {
OpInfo.isIndirect = false;
OpInfo.ConstraintType = TargetLowering::C_Address;
diff --git a/llvm/lib/IR/InlineAsm.cpp b/llvm/lib/IR/InlineAsm.cpp
index 922081468a775..a6f83dcdf3462 100644
--- a/llvm/lib/IR/InlineAsm.cpp
+++ b/llvm/lib/IR/InlineAsm.cpp
@@ -60,17 +60,32 @@ FunctionType *InlineAsm::getFunctionType() const {
return FTy;
}
-void InlineAsm::collectAsmStrs(SmallVectorImpl<StringRef> &AsmStrs) const {
+SmallVector<StringRef> InlineAsm::collectAsmInstrs() const {
+ if (AsmString.empty())
+ return {};
StringRef AsmStr(AsmString);
- AsmStrs.clear();
-
- // TODO: 1) Unify delimiter for inline asm, we also meet other delimiters
- // for example "\0A", ";".
- // 2) Enhance StringRef. Some of the special delimiter ("\0") can't be
- // split in StringRef. Also empty StringRef can not call split (will stuck).
- if (AsmStr.empty())
- return;
- AsmStr.split(AsmStrs, "\n\t", -1, false);
+ // First break the assembly string into lines.
+ SmallVector<StringRef> AsmLines;
+ AsmStr.split(AsmLines, '\n');
+
+ SmallVector<StringRef> AsmInstrs;
+ AsmInstrs.reserve(AsmLines.size());
+ for (StringRef &AsmLine : AsmLines) {
+ // First remove the comments. Note it's important to do this before breaking
+ // by ';' since the comment portion may include that character too.
+ AsmLine = AsmLine.split('#').first.split("//").first;
+ if (AsmLine.empty())
+ continue;
+ // Break by ';' to collect separate instructions in a single line.
+ SmallVector<StringRef, 1> CurrentLineAsmInstrs;
+ AsmLine.split(CurrentLineAsmInstrs, ';');
+ for (StringRef S : CurrentLineAsmInstrs) {
+ StringRef Trimmed = S.trim();
+ if (!Trimmed.empty())
+ AsmInstrs.push_back(Trimmed);
+ }
+ }
+ return AsmInstrs;
}
/// Parse - Analyze the specified string (e.g. "==&{eax}") and fill in the
diff --git a/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll b/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll
index d3ca872509ad5..cabba1ede609a 100644
--- a/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll
@@ -59,7 +59,7 @@ define void @func() local_unnamed_addr #0 {
entry:
%call = tail call i32 @static_func()
;; We test call, CALL, and jmp.
- tail call void asm sideeffect inteldialect "call ${0:P}\0A\09CALL ${1:P}\0A\09jmp ${1:P}\0A\09shr eax, $$0\0A\09shr ebx, $$0\0A\09shr ecx, $$0\0A\09shr edx, $$0\0A\09shr edi, $$0\0A\09shr esi, $$0\0A\09shr ebp, $$0\0A\09shr esp, $$0", "*m,*m,~{eax},~{ebp},~{ebx},~{ecx},~{edi},~{edx},~{flags},~{esi},~{esp},~{dirflag},~{fpsr},~{flags}"(ptr nonnull elementtype(i32 (...)) @static_func, ptr nonnull elementtype(i32 (...)) @extern_func) #0
+ tail call void asm sideeffect inteldialect "call ${0:P}\0A\09CALL ${1:P}; jmp ${1:P}\0A\09shr eax, $$0\0Ashr ebx, $$0\0A\09shr ecx, $$0\0A\09shr edx, $$0; shr edi, $$0\0A\09shr esi, $$0\0A\09shr ebp, $$0\0A\09shr esp, $$0", "*m,*m,~{eax},~{ebp},~{ebx},~{ecx},~{edi},~{edx},~{flags},~{esi},~{esp},~{dirflag},~{fpsr},~{flags}"(ptr nonnull elementtype(i32 (...)) @static_func, ptr nonnull elementtype(i32 (...)) @extern_func) #0
ret void
}
diff --git a/llvm/test/CodeGen/X86/inline-asm-p-constraint.ll b/llvm/test/CodeGen/X86/inline-asm-p-constraint.ll
index 50185343662b7..0837514a021fd 100644
--- a/llvm/test/CodeGen/X86/inline-asm-p-constraint.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-p-constraint.ll
@@ -7,10 +7,9 @@ define ptr @foo(ptr %Ptr) {
%Ptr.addr = alloca ptr, align 8
store ptr %Ptr, ptr %Ptr.addr, align 8
; CHECK: movq %rdi, -8(%rsp)
- %1 = tail call ptr asm "mov $1, $0\0A\09lea $2, $0", "=r,p,*m,~{dirflag},~{fpsr},~{flags}"(ptr %Ptr, ptr elementtype(ptr) %Ptr.addr)
+ %1 = tail call ptr asm "mov $1, $0; lea $2, $0", "=r,p,*m,~{dirflag},~{fpsr},~{flags}"(ptr %Ptr, ptr elementtype(ptr) %Ptr.addr)
; CHECK-NEXT: #APP
-; CHECK-NEXT: mov (%rdi), %rax
-; CHECK-NEXT: lea -8(%rsp), %rax
+; CHECK-NEXT: mov (%rdi), %rax; lea -8(%rsp), %rax
; CHECK-NEXT: #NO_APP
ret ptr %1
; CHECK-NEXT: retq
diff --git a/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll b/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll
index 7d8121d04996e..0018afdf4968a 100644
--- a/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll
+++ b/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll
@@ -27,7 +27,7 @@ define void @caller(i32 %a, i1 %b) #0 {
;; destination section and two assembly instructions in the outlined "other"
;; section.
define void @callee(i32 %a, i1 %b) {
- call void asm sideeffect "s_nop 1\0A\09.pushsection other\0A\09s_nop 2\0A\09s_nop 3\0A\09.popsection\0A\09s_nop 4\0A\09.align 32", ""()
+ call void asm sideeffect "s_nop 1 # some comment ; still comment \09.pushsection other; s_nop 2 \0A s_nop 3 \0A.popsection\0A s_nop 4; label:\0A", ""()
ret void
}
; CHECK: define void @callee
>From d716bcf485044bb110df01f0f9f83976732df5c8 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Fri, 18 Jul 2025 17:29:52 +0000
Subject: [PATCH 2/3] Elaborate comment and remove empty AsmString handling.
---
llvm/include/llvm/IR/InlineAsm.h | 4 ++--
llvm/lib/IR/InlineAsm.cpp | 2 --
2 files changed, 2 insertions(+), 4 deletions(-)
diff --git a/llvm/include/llvm/IR/InlineAsm.h b/llvm/include/llvm/IR/InlineAsm.h
index 6144d2f07335b..eb13a70a5fad6 100644
--- a/llvm/include/llvm/IR/InlineAsm.h
+++ b/llvm/include/llvm/IR/InlineAsm.h
@@ -88,8 +88,8 @@ class InlineAsm final : public Value {
StringRef getAsmString() const { return AsmString; }
StringRef getConstraintString() const { return Constraints; }
- /// collectAsmInstrs - Parses the assembly instruction and collects individual
- /// instructions in a vector. Handles both '\n' and ';' as instruction
+ /// collectAsmInstrs - Parses the assembly instruction and returns individual
+ /// non-empty instructions in a vector. Handles both '\n' and ';' as instruction
/// separators. Trims comments (marked by '#' and "//") and whitespaces from
/// instructions.
LLVM_ABI SmallVector<StringRef> collectAsmInstrs() const;
diff --git a/llvm/lib/IR/InlineAsm.cpp b/llvm/lib/IR/InlineAsm.cpp
index a6f83dcdf3462..12f671e824cdf 100644
--- a/llvm/lib/IR/InlineAsm.cpp
+++ b/llvm/lib/IR/InlineAsm.cpp
@@ -61,8 +61,6 @@ FunctionType *InlineAsm::getFunctionType() const {
}
SmallVector<StringRef> InlineAsm::collectAsmInstrs() const {
- if (AsmString.empty())
- return {};
StringRef AsmStr(AsmString);
// First break the assembly string into lines.
SmallVector<StringRef> AsmLines;
>From 1053d21fb87a910ff4cafa24e8ffaa6fa2ccf3b0 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Fri, 18 Jul 2025 17:30:05 +0000
Subject: [PATCH 3/3] clang-format.
---
llvm/include/llvm/IR/InlineAsm.h | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/include/llvm/IR/InlineAsm.h b/llvm/include/llvm/IR/InlineAsm.h
index eb13a70a5fad6..3ba0211d7bd99 100644
--- a/llvm/include/llvm/IR/InlineAsm.h
+++ b/llvm/include/llvm/IR/InlineAsm.h
@@ -89,9 +89,9 @@ class InlineAsm final : public Value {
StringRef getConstraintString() const { return Constraints; }
/// collectAsmInstrs - Parses the assembly instruction and returns individual
- /// non-empty instructions in a vector. Handles both '\n' and ';' as instruction
- /// separators. Trims comments (marked by '#' and "//") and whitespaces from
- /// instructions.
+ /// non-empty instructions in a vector. Handles both '\n' and ';' as
+ /// instruction separators. Trims comments (marked by '#' and "//") and
+ /// whitespaces from instructions.
LLVM_ABI SmallVector<StringRef> collectAsmInstrs() const;
/// This static method can be used by the parser to check to see if the
More information about the llvm-commits
mailing list