[llvm] Treat ';' and '\n' as assembly instruction separators in collectAsmInstrs (PR #149365)

Rahman Lavaee via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 18 10:30:23 PDT 2025


https://github.com/rlavaee updated https://github.com/llvm/llvm-project/pull/149365

>From 0d6b846c0d7344a9a15e83121068e78523fd4328 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 17 Jul 2025 05:32:13 +0000
Subject: [PATCH 1/3] Treat ';' and '\n' as assembly instruction separators in
 collectAsmInstrs

This also fixes the incorrect treatment of '\n\t' as a separator for asm
instructions.
---
 llvm/include/llvm/IR/InlineAsm.h              |  7 +++-
 llvm/lib/Analysis/InlineCost.cpp              | 21 ++++-------
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  5 ++-
 llvm/lib/IR/InlineAsm.cpp                     | 35 +++++++++++++------
 .../X86/inline-asm-function-call-pic.ll       |  2 +-
 .../CodeGen/X86/inline-asm-p-constraint.ll    |  5 ++-
 .../Inline/inline-call-with-asm-call.ll       |  2 +-
 7 files changed, 43 insertions(+), 34 deletions(-)

diff --git a/llvm/include/llvm/IR/InlineAsm.h b/llvm/include/llvm/IR/InlineAsm.h
index 96887d129a69f..6144d2f07335b 100644
--- a/llvm/include/llvm/IR/InlineAsm.h
+++ b/llvm/include/llvm/IR/InlineAsm.h
@@ -87,7 +87,12 @@ class InlineAsm final : public Value {
 
   StringRef getAsmString() const { return AsmString; }
   StringRef getConstraintString() const { return Constraints; }
-  LLVM_ABI void collectAsmStrs(SmallVectorImpl<StringRef> &AsmStrs) const;
+
+  /// collectAsmInstrs - Parses the assembly instruction and collects individual
+  /// instructions in a vector. Handles both '\n' and ';' as instruction
+  /// separators. Trims comments (marked by '#' and "//") and whitespaces from
+  /// instructions.
+  LLVM_ABI SmallVector<StringRef> collectAsmInstrs() const;
 
   /// This static method can be used by the parser to check to see if the
   /// specified constraint string is legal for the type.
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 22f4d08448a22..35e3a8f731db3 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -793,33 +793,24 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
   void onInlineAsm(const InlineAsm &Arg) override {
     if (!InlineAsmInstrCost)
       return;
-    SmallVector<StringRef, 4> AsmStrs;
-    Arg.collectAsmStrs(AsmStrs);
     int SectionLevel = 0;
     int InlineAsmInstrCount = 0;
-    for (StringRef AsmStr : AsmStrs) {
-      // Trim whitespaces and comments.
-      StringRef Trimmed = AsmStr.trim();
-      size_t hashPos = Trimmed.find('#');
-      if (hashPos != StringRef::npos)
-        Trimmed = Trimmed.substr(0, hashPos);
-      // Ignore comments.
-      if (Trimmed.empty())
-        continue;
+    for (StringRef AsmInstr : Arg.collectAsmInstrs()) {
       // Filter out the outlined assembly instructions from the cost by keeping
       // track of the section level and only accounting for instrutions at
       // section level of zero. Note there will be duplication in outlined
       // sections too, but is not accounted in the inlining cost model.
-      if (Trimmed.starts_with(".pushsection")) {
+      if (AsmInstr.starts_with(".pushsection")) {
         ++SectionLevel;
         continue;
       }
-      if (Trimmed.starts_with(".popsection")) {
+      if (AsmInstr.starts_with(".popsection")) {
         --SectionLevel;
         continue;
       }
-      // Ignore directives and labels.
-      if (Trimmed.starts_with(".") || Trimmed.contains(":"))
+      // Labels are free. Note we only exclude labels that are not followed by
+      // any other instruction.
+      if (AsmInstr.ends_with(":"))
         continue;
       if (SectionLevel == 0)
         ++InlineAsmInstrCount;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 74c14ede24755..24dfd3a757cdd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -10006,8 +10006,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
   }
 
   int OpNo = -1;
-  SmallVector<StringRef> AsmStrs;
-  IA->collectAsmStrs(AsmStrs);
+  SmallVector<StringRef> AsmInstrs = IA->collectAsmInstrs();
 
   // Second pass over the constraints: compute which constraint option to use.
   for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
@@ -10051,7 +10050,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
     // label, so here we don't handle jmp function label now, but we need to
     // enhance it (especilly in PIC model) if we meet meaningful requirements.
     if (OpInfo.isIndirect && isFunction(OpInfo.CallOperand) &&
-        TLI.isInlineAsmTargetBranch(AsmStrs, OpNo) &&
+        TLI.isInlineAsmTargetBranch(AsmInstrs, OpNo) &&
         TM.getCodeModel() != CodeModel::Large) {
       OpInfo.isIndirect = false;
       OpInfo.ConstraintType = TargetLowering::C_Address;
diff --git a/llvm/lib/IR/InlineAsm.cpp b/llvm/lib/IR/InlineAsm.cpp
index 922081468a775..a6f83dcdf3462 100644
--- a/llvm/lib/IR/InlineAsm.cpp
+++ b/llvm/lib/IR/InlineAsm.cpp
@@ -60,17 +60,32 @@ FunctionType *InlineAsm::getFunctionType() const {
   return FTy;
 }
 
-void InlineAsm::collectAsmStrs(SmallVectorImpl<StringRef> &AsmStrs) const {
+SmallVector<StringRef> InlineAsm::collectAsmInstrs() const {
+  if (AsmString.empty())
+    return {};
   StringRef AsmStr(AsmString);
-  AsmStrs.clear();
-
-  // TODO: 1) Unify delimiter for inline asm, we also meet other delimiters
-  // for example "\0A", ";".
-  // 2) Enhance StringRef. Some of the special delimiter ("\0") can't be
-  // split in StringRef. Also empty StringRef can not call split (will stuck).
-  if (AsmStr.empty())
-    return;
-  AsmStr.split(AsmStrs, "\n\t", -1, false);
+  // First break the assembly string into lines.
+  SmallVector<StringRef> AsmLines;
+  AsmStr.split(AsmLines, '\n');
+
+  SmallVector<StringRef> AsmInstrs;
+  AsmInstrs.reserve(AsmLines.size());
+  for (StringRef &AsmLine : AsmLines) {
+    // First remove the comments. Note it's important to do this before breaking
+    // by ';' since the comment portion may include that character too.
+    AsmLine = AsmLine.split('#').first.split("//").first;
+    if (AsmLine.empty())
+      continue;
+    // Break by ';' to collect separate instructions in a single line.
+    SmallVector<StringRef, 1> CurrentLineAsmInstrs;
+    AsmLine.split(CurrentLineAsmInstrs, ';');
+    for (StringRef S : CurrentLineAsmInstrs) {
+      StringRef Trimmed = S.trim();
+      if (!Trimmed.empty())
+        AsmInstrs.push_back(Trimmed);
+    }
+  }
+  return AsmInstrs;
 }
 
 /// Parse - Analyze the specified string (e.g. "==&{eax}") and fill in the
diff --git a/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll b/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll
index d3ca872509ad5..cabba1ede609a 100644
--- a/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll
@@ -59,7 +59,7 @@ define void @func() local_unnamed_addr #0 {
 entry:
   %call = tail call i32 @static_func()
 ;; We test call, CALL, and jmp.
-  tail call void asm sideeffect inteldialect "call ${0:P}\0A\09CALL ${1:P}\0A\09jmp ${1:P}\0A\09shr eax, $$0\0A\09shr ebx, $$0\0A\09shr ecx, $$0\0A\09shr edx, $$0\0A\09shr edi, $$0\0A\09shr esi, $$0\0A\09shr ebp, $$0\0A\09shr esp, $$0", "*m,*m,~{eax},~{ebp},~{ebx},~{ecx},~{edi},~{edx},~{flags},~{esi},~{esp},~{dirflag},~{fpsr},~{flags}"(ptr nonnull elementtype(i32 (...)) @static_func, ptr nonnull elementtype(i32 (...)) @extern_func) #0
+  tail call void asm sideeffect inteldialect "call ${0:P}\0A\09CALL ${1:P}; jmp ${1:P}\0A\09shr eax, $$0\0Ashr ebx, $$0\0A\09shr ecx, $$0\0A\09shr edx, $$0; shr edi, $$0\0A\09shr esi, $$0\0A\09shr ebp, $$0\0A\09shr esp, $$0", "*m,*m,~{eax},~{ebp},~{ebx},~{ecx},~{edi},~{edx},~{flags},~{esi},~{esp},~{dirflag},~{fpsr},~{flags}"(ptr nonnull elementtype(i32 (...)) @static_func, ptr nonnull elementtype(i32 (...)) @extern_func) #0
   ret void
 }
 
diff --git a/llvm/test/CodeGen/X86/inline-asm-p-constraint.ll b/llvm/test/CodeGen/X86/inline-asm-p-constraint.ll
index 50185343662b7..0837514a021fd 100644
--- a/llvm/test/CodeGen/X86/inline-asm-p-constraint.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-p-constraint.ll
@@ -7,10 +7,9 @@ define ptr @foo(ptr %Ptr) {
   %Ptr.addr = alloca ptr, align 8
   store ptr %Ptr, ptr %Ptr.addr, align 8
 ; CHECK: movq    %rdi, -8(%rsp)
-  %1 = tail call ptr asm "mov $1, $0\0A\09lea $2, $0", "=r,p,*m,~{dirflag},~{fpsr},~{flags}"(ptr %Ptr, ptr elementtype(ptr) %Ptr.addr)
+  %1 = tail call ptr asm "mov $1, $0; lea $2, $0", "=r,p,*m,~{dirflag},~{fpsr},~{flags}"(ptr %Ptr, ptr elementtype(ptr) %Ptr.addr)
 ; CHECK-NEXT: #APP
-; CHECK-NEXT: mov (%rdi), %rax
-; CHECK-NEXT: lea -8(%rsp), %rax
+; CHECK-NEXT: mov (%rdi), %rax; lea -8(%rsp), %rax
 ; CHECK-NEXT: #NO_APP
   ret ptr %1
 ; CHECK-NEXT: retq
diff --git a/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll b/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll
index 7d8121d04996e..0018afdf4968a 100644
--- a/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll
+++ b/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll
@@ -27,7 +27,7 @@ define void @caller(i32 %a, i1 %b) #0 {
 ;; destination section and two assembly instructions in the outlined "other"
 ;; section.
 define void @callee(i32 %a, i1 %b) {
-  call void asm sideeffect "s_nop 1\0A\09.pushsection other\0A\09s_nop 2\0A\09s_nop 3\0A\09.popsection\0A\09s_nop 4\0A\09.align 32", ""()
+  call void asm sideeffect "s_nop 1 # some comment ; still comment \09.pushsection other; s_nop 2 \0A s_nop 3 \0A.popsection\0A s_nop 4; label:\0A", ""()
   ret void
 }
 ; CHECK: define void @callee

>From d716bcf485044bb110df01f0f9f83976732df5c8 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Fri, 18 Jul 2025 17:29:52 +0000
Subject: [PATCH 2/3] Elaborate comment and remove empty AsmString handling.

---
 llvm/include/llvm/IR/InlineAsm.h | 4 ++--
 llvm/lib/IR/InlineAsm.cpp        | 2 --
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/IR/InlineAsm.h b/llvm/include/llvm/IR/InlineAsm.h
index 6144d2f07335b..eb13a70a5fad6 100644
--- a/llvm/include/llvm/IR/InlineAsm.h
+++ b/llvm/include/llvm/IR/InlineAsm.h
@@ -88,8 +88,8 @@ class InlineAsm final : public Value {
   StringRef getAsmString() const { return AsmString; }
   StringRef getConstraintString() const { return Constraints; }
 
-  /// collectAsmInstrs - Parses the assembly instruction and collects individual
-  /// instructions in a vector. Handles both '\n' and ';' as instruction
+  /// collectAsmInstrs - Parses the assembly instruction and returns individual
+  /// non-empty instructions in a vector. Handles both '\n' and ';' as instruction
   /// separators. Trims comments (marked by '#' and "//") and whitespaces from
   /// instructions.
   LLVM_ABI SmallVector<StringRef> collectAsmInstrs() const;
diff --git a/llvm/lib/IR/InlineAsm.cpp b/llvm/lib/IR/InlineAsm.cpp
index a6f83dcdf3462..12f671e824cdf 100644
--- a/llvm/lib/IR/InlineAsm.cpp
+++ b/llvm/lib/IR/InlineAsm.cpp
@@ -61,8 +61,6 @@ FunctionType *InlineAsm::getFunctionType() const {
 }
 
 SmallVector<StringRef> InlineAsm::collectAsmInstrs() const {
-  if (AsmString.empty())
-    return {};
   StringRef AsmStr(AsmString);
   // First break the assembly string into lines.
   SmallVector<StringRef> AsmLines;

>From 1053d21fb87a910ff4cafa24e8ffaa6fa2ccf3b0 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Fri, 18 Jul 2025 17:30:05 +0000
Subject: [PATCH 3/3] clang-format.

---
 llvm/include/llvm/IR/InlineAsm.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/IR/InlineAsm.h b/llvm/include/llvm/IR/InlineAsm.h
index eb13a70a5fad6..3ba0211d7bd99 100644
--- a/llvm/include/llvm/IR/InlineAsm.h
+++ b/llvm/include/llvm/IR/InlineAsm.h
@@ -89,9 +89,9 @@ class InlineAsm final : public Value {
   StringRef getConstraintString() const { return Constraints; }
 
   /// collectAsmInstrs - Parses the assembly instruction and returns individual
-  /// non-empty instructions in a vector. Handles both '\n' and ';' as instruction
-  /// separators. Trims comments (marked by '#' and "//") and whitespaces from
-  /// instructions.
+  /// non-empty instructions in a vector. Handles both '\n' and ';' as
+  /// instruction separators. Trims comments (marked by '#' and "//") and
+  /// whitespaces from instructions.
   LLVM_ABI SmallVector<StringRef> collectAsmInstrs() const;
 
   /// This static method can be used by the parser to check to see if the



More information about the llvm-commits mailing list