[llvm] [BranchFolding] Avoid moving blocks to fall through to an indirect target of inline asm (PR #152916)

via llvm-commits llvm-commits at lists.llvm.org
Sun Aug 10 09:13:49 PDT 2025


https://github.com/XChy updated https://github.com/llvm/llvm-project/pull/152916

>From efeb4ae30a18e0c6b3783f3186a59ae1dcd3dc03 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Fri, 8 Aug 2025 04:15:08 +0800
Subject: [PATCH 1/4] [SelectionDAGBuilder] Look for appropriate INLINEASM_BR
 instruction to verify

---
 .../SelectionDAG/SelectionDAGBuilder.cpp      | 13 +++--
 llvm/test/CodeGen/X86/callbr-asm-loop.ll      | 50 +++++++++++++++++++
 2 files changed, 59 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/callbr-asm-loop.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index d0815e9f51822..f096148f865cb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -12737,17 +12737,22 @@ static Register FollowCopyChain(MachineRegisterInfo &MRI, Register Reg) {
   assert(MI->getOpcode() == TargetOpcode::COPY &&
          "start of copy chain MUST be COPY");
   Reg = MI->getOperand(1).getReg();
+
+  // If the copied register in the first copy must be virtual.
+  assert(Reg.isVirtual() && "expected COPY of virtual register");
   MI = MRI.def_begin(Reg)->getParent();
+
   // There may be an optional second copy.
   if (MI->getOpcode() == TargetOpcode::COPY) {
     assert(Reg.isVirtual() && "expected COPY of virtual register");
     Reg = MI->getOperand(1).getReg();
     assert(Reg.isPhysical() && "expected COPY of physical register");
-    MI = MRI.def_begin(Reg)->getParent();
+  } else {
+    // The start of the chain must be an INLINEASM_BR.
+    assert(MI->getOpcode() == TargetOpcode::INLINEASM_BR &&
+           "end of copy chain MUST be INLINEASM_BR");
   }
-  // The start of the chain must be an INLINEASM_BR.
-  assert(MI->getOpcode() == TargetOpcode::INLINEASM_BR &&
-         "end of copy chain MUST be INLINEASM_BR");
+
   return Reg;
 }
 
diff --git a/llvm/test/CodeGen/X86/callbr-asm-loop.ll b/llvm/test/CodeGen/X86/callbr-asm-loop.ll
new file mode 100644
index 0000000000000..83affd7e86097
--- /dev/null
+++ b/llvm/test/CodeGen/X86/callbr-asm-loop.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+
+; RUN: llc -O0 -mtriple=i686-- < %s | FileCheck %s
+
+; Test that causes multiple defs of %eax.
+; FIXME: The testcase hangs with -O1/2/3 enabled.
+define i32 @loop1() {
+; CHECK-LABEL: loop1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushl %esi
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    .cfi_offset %esi, -8
+; CHECK-NEXT:    jmp .LBB0_1
+; CHECK-NEXT:  .LBB0_1: # %tailrecurse
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    movl %eax, %ecx
+; CHECK-NEXT:    movl %edx, %esi
+; CHECK-NEXT:    jmp .LBB0_3
+; CHECK-NEXT:  .LBB0_2: # Inline asm indirect target
+; CHECK-NEXT:    # %tailrecurse.tailrecurse.backedge_crit_edge
+; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    # Label of block must be emitted
+; CHECK-NEXT:  .LBB0_3: # %tailrecurse.backedge
+; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    jmp .LBB0_1
+; CHECK-NEXT:  .LBB0_4: # Inline asm indirect target
+; CHECK-NEXT:    # %lab2.split
+; CHECK-NEXT:    # Label of block must be emitted
+; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    popl %esi
+; CHECK-NEXT:    .cfi_def_cfa_offset 4
+; CHECK-NEXT:    retl
+entry:
+  br label %tailrecurse
+
+tailrecurse:
+  %0 = callbr { i32, i32 } asm "", "={ax},={dx},0,1,!i,!i"(i32 0, i32 1) #1
+          to label %tailrecurse.backedge [label %tailrecurse.backedge, label %lab2.split]
+
+tailrecurse.backedge:
+  br label %tailrecurse
+
+lab2.split:
+  %asmresult5 = extractvalue { i32, i32 } %0, 1
+  ret i32 %asmresult5
+}

>From ab2025be0e8880c9020234e723e2f8362bd2fe1c Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Sun, 10 Aug 2025 21:34:32 +0800
Subject: [PATCH 2/4] [BranchFolding] Avoid moving blocks to fall through to an
 indirect target of inline asm

---
 llvm/lib/CodeGen/BranchFolding.cpp       | 10 ++++++++--
 llvm/test/CodeGen/X86/callbr-asm-loop.ll | 19 +++++--------------
 2 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index dcfd9aad70fc5..d49c4b4a5755e 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -1776,8 +1776,9 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
       // Okay, there is no really great place to put this block.  If, however,
       // the block before this one would be a fall-through if this block were
       // removed, move this block to the end of the function. There is no real
-      // advantage in "falling through" to an EH block, so we don't want to
-      // perform this transformation for that case.
+      // advantage in "falling through" to an EH block or an indirect target of
+      // an INLINEASM_BR, so we don't want to perform this transformation for
+      // that case.
       //
       // Also, Windows EH introduced the possibility of an arbitrary number of
       // successors to a given block.  The analyzeBranch call does not consider
@@ -1787,10 +1788,15 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
       // below were performed for EH "FallThrough" blocks.  Therefore, even if
       // that appears not to be happening anymore, we should assume that it is
       // possible and not remove the "!FallThrough()->isEHPad" condition below.
+      //
+      // And inline asm branches also introduced the possibility of infinite
+      // rotation, as there are an arbitrary number of successors to a given
+      // block.
       MachineBasicBlock *PrevTBB = nullptr, *PrevFBB = nullptr;
       SmallVector<MachineOperand, 4> PrevCond;
       if (FallThrough != MF.end() &&
           !FallThrough->isEHPad() &&
+          !FallThrough->isInlineAsmBrIndirectTarget() &&
           !TII->analyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) &&
           PrevBB.isSuccessor(&*FallThrough)) {
         MBB->moveAfter(&MF.back());
diff --git a/llvm/test/CodeGen/X86/callbr-asm-loop.ll b/llvm/test/CodeGen/X86/callbr-asm-loop.ll
index 83affd7e86097..9e890a861e3e4 100644
--- a/llvm/test/CodeGen/X86/callbr-asm-loop.ll
+++ b/llvm/test/CodeGen/X86/callbr-asm-loop.ll
@@ -1,38 +1,29 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 
-; RUN: llc -O0 -mtriple=i686-- < %s | FileCheck %s
+; RUN: llc -O1 -mtriple=i686-- < %s | FileCheck %s
 
 ; Test that causes multiple defs of %eax.
 ; FIXME: The testcase hangs with -O1/2/3 enabled.
 define i32 @loop1() {
 ; CHECK-LABEL: loop1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    pushl %esi
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-NEXT:    .cfi_offset %esi, -8
-; CHECK-NEXT:    jmp .LBB0_1
+; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB0_1: # %tailrecurse
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    movl $1, %edx
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    movl %eax, %ecx
-; CHECK-NEXT:    movl %edx, %esi
-; CHECK-NEXT:    jmp .LBB0_3
+; CHECK-NEXT:    jmp .LBB0_1
 ; CHECK-NEXT:  .LBB0_2: # Inline asm indirect target
-; CHECK-NEXT:    # %tailrecurse.tailrecurse.backedge_crit_edge
+; CHECK-NEXT:    # %tailrecurse.tailrecurse_crit_edge
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    # Label of block must be emitted
-; CHECK-NEXT:  .LBB0_3: # %tailrecurse.backedge
-; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    jmp .LBB0_1
-; CHECK-NEXT:  .LBB0_4: # Inline asm indirect target
+; CHECK-NEXT:  .LBB0_3: # Inline asm indirect target
 ; CHECK-NEXT:    # %lab2.split
 ; CHECK-NEXT:    # Label of block must be emitted
 ; CHECK-NEXT:    movl %edx, %eax
-; CHECK-NEXT:    popl %esi
-; CHECK-NEXT:    .cfi_def_cfa_offset 4
 ; CHECK-NEXT:    retl
 entry:
   br label %tailrecurse

>From e3caf12ddb87611727c63c64eeb94579786ff464 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Sun, 10 Aug 2025 21:51:35 +0800
Subject: [PATCH 3/4] format

---
 llvm/lib/CodeGen/BranchFolding.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index d49c4b4a5755e..d1f83810c70cd 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -1794,8 +1794,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
       // block.
       MachineBasicBlock *PrevTBB = nullptr, *PrevFBB = nullptr;
       SmallVector<MachineOperand, 4> PrevCond;
-      if (FallThrough != MF.end() &&
-          !FallThrough->isEHPad() &&
+      if (FallThrough != MF.end() && !FallThrough->isEHPad() &&
           !FallThrough->isInlineAsmBrIndirectTarget() &&
           !TII->analyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) &&
           PrevBB.isSuccessor(&*FallThrough)) {

>From 3c3aff49ed4ee03dabb39f38270f0196f949ae28 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Mon, 11 Aug 2025 00:13:28 +0800
Subject: [PATCH 4/4] fix testcase

---
 llvm/test/CodeGen/X86/callbr-asm-loop.ll | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/test/CodeGen/X86/callbr-asm-loop.ll b/llvm/test/CodeGen/X86/callbr-asm-loop.ll
index 9e890a861e3e4..84c12e7166b41 100644
--- a/llvm/test/CodeGen/X86/callbr-asm-loop.ll
+++ b/llvm/test/CodeGen/X86/callbr-asm-loop.ll
@@ -3,7 +3,6 @@
 ; RUN: llc -O1 -mtriple=i686-- < %s | FileCheck %s
 
 ; Test that causes multiple defs of %eax.
-; FIXME: The testcase hangs with -O1/2/3 enabled.
 define i32 @loop1() {
 ; CHECK-LABEL: loop1:
 ; CHECK:       # %bb.0: # %entry



More information about the llvm-commits mailing list