[llvm] [CodeGen] Use findPHICopyInsertPoint to put insert point (PR #155421)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 26 08:59:41 PDT 2025
https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/155421
>From e370ef553be372af3ae19fdefad16ba12ee24987 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Tue, 26 Aug 2025 11:55:57 -0400
Subject: [PATCH] [CodeGen] Use findPHICopyInsertPoint to put insert point
---
llvm/include/llvm/CodeGen/TailDuplicator.h | 7 +++---
llvm/lib/CodeGen/PHIEliminationUtils.cpp | 7 +++++-
llvm/lib/CodeGen/TailDuplicator.cpp | 29 +++++++++++-----------
llvm/test/CodeGen/X86/tail-dup-asm-goto.ll | 19 +++++++-------
4 files changed, 34 insertions(+), 28 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TailDuplicator.h b/llvm/include/llvm/CodeGen/TailDuplicator.h
index 8b1f67c416c22..9383ba6ce8671 100644
--- a/llvm/include/llvm/CodeGen/TailDuplicator.h
+++ b/llvm/include/llvm/CodeGen/TailDuplicator.h
@@ -121,9 +121,10 @@ class TailDuplicator {
SmallVectorImpl<MachineBasicBlock *> &TDBBs,
SmallVectorImpl<MachineInstr *> &Copies,
SmallVectorImpl<MachineBasicBlock *> *CandidatePtr);
- void appendCopies(MachineBasicBlock *MBB,
- SmallVectorImpl<std::pair<Register, RegSubRegPair>> &CopyInfos,
- SmallVectorImpl<MachineInstr *> &Copies);
+ void
+ appendCopies(MachineBasicBlock *MBB, MachineBasicBlock *SuccMBB,
+ SmallVectorImpl<std::pair<Register, RegSubRegPair>> &CopyInfos,
+ SmallVectorImpl<MachineInstr *> &Copies);
void removeDeadBlock(
MachineBasicBlock *MBB,
diff --git a/llvm/lib/CodeGen/PHIEliminationUtils.cpp b/llvm/lib/CodeGen/PHIEliminationUtils.cpp
index f4562f437788e..cf3391a1bc158 100644
--- a/llvm/lib/CodeGen/PHIEliminationUtils.cpp
+++ b/llvm/lib/CodeGen/PHIEliminationUtils.cpp
@@ -32,7 +32,12 @@ llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
// instructions that are Calls with EHPad successors or INLINEASM_BR in a
// block.
bool EHPadSuccessor = SuccMBB->isEHPad();
- if (!EHPadSuccessor && !SuccMBB->isInlineAsmBrIndirectTarget())
+ // Bypass fast path if the block itself contains INLINEASM_BR.
+ bool HasInlineAsmBr = llvm::any_of(*MBB, [](const MachineInstr &MI) {
+ return MI.getOpcode() == TargetOpcode::INLINEASM_BR;
+ });
+
+ if (!EHPadSuccessor && !HasInlineAsmBr)
return MBB->getFirstTerminator();
// Discover any defs in this basic block.
diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp
index 5d720fbbf1c61..84011d05ca2f6 100644
--- a/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/TailDuplicator.h"
+#include "PHIEliminationUtils.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
@@ -651,14 +652,6 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
if (PreRegAlloc && MI.isCall())
return false;
- // TailDuplicator::appendCopies will erroneously place COPYs after
- // INLINEASM_BR instructions after 4b0aa5724fea, which demonstrates the same
- // bug that was fixed in f7a53d82c090.
- // FIXME: Use findPHICopyInsertPoint() to find the correct insertion point
- // for the COPY when replacing PHIs.
- if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
- return false;
-
if (MI.isBundle())
InstrCount += MI.getBundleSize();
else if (!MI.isPHI() && !MI.isMetaInstruction())
@@ -926,7 +919,7 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
duplicateInstruction(&MI, TailBB, PredBB, LocalVRMap, UsedByPhi);
}
}
- appendCopies(PredBB, CopyInfos, Copies);
+ appendCopies(PredBB, TailBB, CopyInfos, Copies);
NumTailDupAdded += TailBB->size() - 1; // subtract one for removed branch
@@ -994,7 +987,7 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
duplicateInstruction(MI, TailBB, PrevBB, LocalVRMap, UsedByPhi);
MI->eraseFromParent();
}
- appendCopies(PrevBB, CopyInfos, Copies);
+ appendCopies(PrevBB, TailBB, CopyInfos, Copies);
} else {
TII->removeBranch(*PrevBB);
// No PHIs to worry about, just splice the instructions over.
@@ -1058,7 +1051,7 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
// from PredBB.
processPHI(&MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false);
}
- appendCopies(PredBB, CopyInfos, Copies);
+ appendCopies(PredBB, TailBB, CopyInfos, Copies);
}
return Changed;
@@ -1066,12 +1059,18 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
/// At the end of the block \p MBB generate COPY instructions between registers
/// described by \p CopyInfos. Append resulting instructions to \p Copies.
-void TailDuplicator::appendCopies(MachineBasicBlock *MBB,
- SmallVectorImpl<std::pair<Register, RegSubRegPair>> &CopyInfos,
- SmallVectorImpl<MachineInstr*> &Copies) {
- MachineBasicBlock::iterator Loc = MBB->getFirstTerminator();
+void TailDuplicator::appendCopies(
+ MachineBasicBlock *MBB, MachineBasicBlock *SuccMBB,
+ SmallVectorImpl<std::pair<Register, RegSubRegPair>> &CopyInfos,
+ SmallVectorImpl<MachineInstr *> &Copies) {
const MCInstrDesc &CopyD = TII->get(TargetOpcode::COPY);
+
for (auto &CI : CopyInfos) {
+ // Use findPHICopyInsertPoint as the starting point, but ensure we don't
+ // place COPYs after INLINEASM_BR instructions
+ MachineBasicBlock::iterator Loc =
+ findPHICopyInsertPoint(MBB, SuccMBB, CI.second.Reg);
+
auto C = BuildMI(*MBB, Loc, DebugLoc(), CopyD, CI.first)
.addReg(CI.second.Reg, 0, CI.second.SubReg);
Copies.push_back(C);
diff --git a/llvm/test/CodeGen/X86/tail-dup-asm-goto.ll b/llvm/test/CodeGen/X86/tail-dup-asm-goto.ll
index 7ce983869ce7d..0087b4fa72bb4 100644
--- a/llvm/test/CodeGen/X86/tail-dup-asm-goto.ll
+++ b/llvm/test/CodeGen/X86/tail-dup-asm-goto.ll
@@ -20,30 +20,31 @@ define ptr @test1(ptr %arg1, ptr %arg2) {
; CHECK-NEXT: JMP_1 %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.bb100:
- ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: successors: %bb.5(0x80000000), %bb.4(0x00000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: MOV64mi32 [[COPY1]], 1, $noreg, 0, $noreg, 0 :: (store (s64) into %ir.arg1)
- ; CHECK-NEXT: JMP_1 %bb.3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64 = COPY [[MOV64rm]]
+ ; CHECK-NEXT: INLINEASM_BR &"#$0 $1 $2", 9 /* sideeffect mayload attdialect */, 13 /* imm */, 42, 13 /* imm */, 0, 13 /* imm */, %bb.4, 12 /* clobber */, implicit-def early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def early-clobber $eflags
+ ; CHECK-NEXT: JMP_1 %bb.5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.bb106:
- ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: successors: %bb.5(0x80000000), %bb.4(0x00000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
; CHECK-NEXT: CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.3.bb110:
- ; CHECK-NEXT: successors: %bb.5(0x80000000), %bb.4(0x00000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI:%[0-9]+]]:gr64 = PHI [[COPY]], %bb.2, [[MOV64rm]], %bb.1
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr64 = COPY [[COPY]]
; CHECK-NEXT: INLINEASM_BR &"#$0 $1 $2", 9 /* sideeffect mayload attdialect */, 13 /* imm */, 42, 13 /* imm */, 0, 13 /* imm */, %bb.4, 12 /* clobber */, implicit-def early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def early-clobber $eflags
; CHECK-NEXT: JMP_1 %bb.5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4.bb17.i.i.i (inlineasm-br-indirect-target):
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:gr64 = PHI [[COPY2]], %bb.1, [[COPY3]], %bb.2
+ ; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5.kmem_cache_has_cpu_partial.exit:
- ; CHECK-NEXT: $rax = COPY [[PHI]]
+ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gr64 = PHI [[PHI]], %bb.4, [[COPY2]], %bb.1, [[COPY3]], %bb.2
+ ; CHECK-NEXT: $rax = COPY [[PHI1]]
; CHECK-NEXT: RET 0, $rax
bb:
%i28.i = load ptr, ptr %arg1, align 8
More information about the llvm-commits
mailing list