[llvm] [BranchFolding] Avoid moving blocks to fall through to an indirect target of inline asm (PR #152916)

Sun Aug 10 06:44:20 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-backend-x86

Author: XChy (XChy)

<details>
<summary>Changes</summary>

Depend on #152591 to fix https://github.com/llvm/llvm-project/issues/149023.
Similar to an EH pad, there is no real advantage in "falling through" to an indirect target of an INLINEASM_BR. And multiple indirect targets of inline asm at the end of a function may be rotated infinitely.
Therefore, this patch avoids such optimization on indirect target of inline asm as fall through.

---
Full diff: https://github.com/llvm/llvm-project/pull/152916.diff


3 Files Affected:

- (modified) llvm/lib/CodeGen/BranchFolding.cpp (+8-2) 
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+9-4) 
- (added) llvm/test/CodeGen/X86/callbr-asm-loop.ll (+41) 


``````````diff

diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index dcfd9aad70fc5..d49c4b4a5755e 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -1776,8 +1776,9 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
       // Okay, there is no really great place to put this block.  If, however,
       // the block before this one would be a fall-through if this block were
       // removed, move this block to the end of the function. There is no real
-      // advantage in "falling through" to an EH block, so we don't want to
-      // perform this transformation for that case.
+      // advantage in "falling through" to an EH block or an indirect target of
+      // an INLINEASM_BR, so we don't want to perform this transformation for
+      // that case.
       //
       // Also, Windows EH introduced the possibility of an arbitrary number of
       // successors to a given block.  The analyzeBranch call does not consider
@@ -1787,10 +1788,15 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
       // below were performed for EH "FallThrough" blocks.  Therefore, even if
       // that appears not to be happening anymore, we should assume that it is
       // possible and not remove the "!FallThrough()->isEHPad" condition below.
+      //
+      // And inline asm branches also introduced the possibility of infinite
+      // rotation, as there are an arbitrary number of successors to a given
+      // block.
       MachineBasicBlock *PrevTBB = nullptr, *PrevFBB = nullptr;
       SmallVector<MachineOperand, 4> PrevCond;
       if (FallThrough != MF.end() &&
           !FallThrough->isEHPad() &&
+          !FallThrough->isInlineAsmBrIndirectTarget() &&
           !TII->analyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) &&
           PrevBB.isSuccessor(&*FallThrough)) {
         MBB->moveAfter(&MF.back());
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index d0815e9f51822..f096148f865cb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -12737,17 +12737,22 @@ static Register FollowCopyChain(MachineRegisterInfo &MRI, Register Reg) {
   assert(MI->getOpcode() == TargetOpcode::COPY &&
          "start of copy chain MUST be COPY");
   Reg = MI->getOperand(1).getReg();
+
+  // If the copied register in the first copy must be virtual.
+  assert(Reg.isVirtual() && "expected COPY of virtual register");
   MI = MRI.def_begin(Reg)->getParent();
+
   // There may be an optional second copy.
   if (MI->getOpcode() == TargetOpcode::COPY) {
     assert(Reg.isVirtual() && "expected COPY of virtual register");
     Reg = MI->getOperand(1).getReg();
     assert(Reg.isPhysical() && "expected COPY of physical register");
-    MI = MRI.def_begin(Reg)->getParent();
+  } else {
+    // The start of the chain must be an INLINEASM_BR.
+    assert(MI->getOpcode() == TargetOpcode::INLINEASM_BR &&
+           "end of copy chain MUST be INLINEASM_BR");
   }
-  // The start of the chain must be an INLINEASM_BR.
-  assert(MI->getOpcode() == TargetOpcode::INLINEASM_BR &&
-         "end of copy chain MUST be INLINEASM_BR");
+
   return Reg;
 }
 
diff --git a/llvm/test/CodeGen/X86/callbr-asm-loop.ll b/llvm/test/CodeGen/X86/callbr-asm-loop.ll
new file mode 100644
index 0000000000000..9e890a861e3e4
--- /dev/null
+++ b/llvm/test/CodeGen/X86/callbr-asm-loop.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+
+; RUN: llc -O1 -mtriple=i686-- < %s | FileCheck %s
+
+; Test that causes multiple defs of %eax.
+; FIXME: The testcase hangs with -O1/2/3 enabled.
+define i32 @loop1() {
+; CHECK-LABEL: loop1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  .LBB0_1: # %tailrecurse
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    jmp .LBB0_1
+; CHECK-NEXT:  .LBB0_2: # Inline asm indirect target
+; CHECK-NEXT:    # %tailrecurse.tailrecurse_crit_edge
+; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    # Label of block must be emitted
+; CHECK-NEXT:    jmp .LBB0_1
+; CHECK-NEXT:  .LBB0_3: # Inline asm indirect target
+; CHECK-NEXT:    # %lab2.split
+; CHECK-NEXT:    # Label of block must be emitted
+; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    retl
+entry:
+  br label %tailrecurse
+
+tailrecurse:
+  %0 = callbr { i32, i32 } asm "", "={ax},={dx},0,1,!i,!i"(i32 0, i32 1) #1
+          to label %tailrecurse.backedge [label %tailrecurse.backedge, label %lab2.split]
+
+tailrecurse.backedge:
+  br label %tailrecurse
+
+lab2.split:
+  %asmresult5 = extractvalue { i32, i32 } %0, 1
+  ret i32 %asmresult5
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/152916