[llvm] [BranchFolding] Avoid moving blocks to fall through to an indirect target of inline asm (PR #152916)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Aug 10 06:44:20 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: XChy (XChy)
<details>
<summary>Changes</summary>
Depend on #<!-- -->152591 to fix https://github.com/llvm/llvm-project/issues/149023.
Similar to an EH pad, there is no real advantage in "falling through" to an indirect target of an INLINEASM_BR. And multiple indirect targets of inline asm at the end of a function may be rotated infinitely.
Therefore, this patch avoids such optimization on indirect target of inline asm as fall through.
---
Full diff: https://github.com/llvm/llvm-project/pull/152916.diff
3 Files Affected:
- (modified) llvm/lib/CodeGen/BranchFolding.cpp (+8-2)
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+9-4)
- (added) llvm/test/CodeGen/X86/callbr-asm-loop.ll (+41)
``````````diff
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index dcfd9aad70fc5..d49c4b4a5755e 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -1776,8 +1776,9 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
// Okay, there is no really great place to put this block. If, however,
// the block before this one would be a fall-through if this block were
// removed, move this block to the end of the function. There is no real
- // advantage in "falling through" to an EH block, so we don't want to
- // perform this transformation for that case.
+ // advantage in "falling through" to an EH block or an indirect target of
+ // an INLINEASM_BR, so we don't want to perform this transformation for
+ // that case.
//
// Also, Windows EH introduced the possibility of an arbitrary number of
// successors to a given block. The analyzeBranch call does not consider
@@ -1787,10 +1788,15 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
// below were performed for EH "FallThrough" blocks. Therefore, even if
// that appears not to be happening anymore, we should assume that it is
// possible and not remove the "!FallThrough()->isEHPad" condition below.
+ //
+ // And inline asm branches also introduced the possibility of infinite
+ // rotation, as there are an arbitrary number of successors to a given
+ // block.
MachineBasicBlock *PrevTBB = nullptr, *PrevFBB = nullptr;
SmallVector<MachineOperand, 4> PrevCond;
if (FallThrough != MF.end() &&
!FallThrough->isEHPad() &&
+ !FallThrough->isInlineAsmBrIndirectTarget() &&
!TII->analyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) &&
PrevBB.isSuccessor(&*FallThrough)) {
MBB->moveAfter(&MF.back());
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index d0815e9f51822..f096148f865cb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -12737,17 +12737,22 @@ static Register FollowCopyChain(MachineRegisterInfo &MRI, Register Reg) {
assert(MI->getOpcode() == TargetOpcode::COPY &&
"start of copy chain MUST be COPY");
Reg = MI->getOperand(1).getReg();
+
+ // If the copied register in the first copy must be virtual.
+ assert(Reg.isVirtual() && "expected COPY of virtual register");
MI = MRI.def_begin(Reg)->getParent();
+
// There may be an optional second copy.
if (MI->getOpcode() == TargetOpcode::COPY) {
assert(Reg.isVirtual() && "expected COPY of virtual register");
Reg = MI->getOperand(1).getReg();
assert(Reg.isPhysical() && "expected COPY of physical register");
- MI = MRI.def_begin(Reg)->getParent();
+ } else {
+ // The start of the chain must be an INLINEASM_BR.
+ assert(MI->getOpcode() == TargetOpcode::INLINEASM_BR &&
+ "end of copy chain MUST be INLINEASM_BR");
}
- // The start of the chain must be an INLINEASM_BR.
- assert(MI->getOpcode() == TargetOpcode::INLINEASM_BR &&
- "end of copy chain MUST be INLINEASM_BR");
+
return Reg;
}
diff --git a/llvm/test/CodeGen/X86/callbr-asm-loop.ll b/llvm/test/CodeGen/X86/callbr-asm-loop.ll
new file mode 100644
index 0000000000000..9e890a861e3e4
--- /dev/null
+++ b/llvm/test/CodeGen/X86/callbr-asm-loop.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+
+; RUN: llc -O1 -mtriple=i686-- < %s | FileCheck %s
+
+; Test that causes multiple defs of %eax.
+; FIXME: The testcase hangs with -O1/2/3 enabled.
+define i32 @loop1() {
+; CHECK-LABEL: loop1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB0_1: # %tailrecurse
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: movl $1, %edx
+; CHECK-NEXT: #APP
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jmp .LBB0_1
+; CHECK-NEXT: .LBB0_2: # Inline asm indirect target
+; CHECK-NEXT: # %tailrecurse.tailrecurse_crit_edge
+; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: # Label of block must be emitted
+; CHECK-NEXT: jmp .LBB0_1
+; CHECK-NEXT: .LBB0_3: # Inline asm indirect target
+; CHECK-NEXT: # %lab2.split
+; CHECK-NEXT: # Label of block must be emitted
+; CHECK-NEXT: movl %edx, %eax
+; CHECK-NEXT: retl
+entry:
+ br label %tailrecurse
+
+tailrecurse:
+ %0 = callbr { i32, i32 } asm "", "={ax},={dx},0,1,!i,!i"(i32 0, i32 1) #1
+ to label %tailrecurse.backedge [label %tailrecurse.backedge, label %lab2.split]
+
+tailrecurse.backedge:
+ br label %tailrecurse
+
+lab2.split:
+ %asmresult5 = extractvalue { i32, i32 } %0, 1
+ ret i32 %asmresult5
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/152916
More information about the llvm-commits
mailing list