[llvm] [MachineSink] Add option for aggressive loop sinking (PR #117247)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 21 14:13:16 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Jeffrey Byrnes (jrbyrnes)
<details>
<summary>Changes</summary>
The current MIR cycle sinking capabilities are rather limited. It only support sinking copies into a single successor block while obeying limits.
This opt-in feature adds a more aggressive option, that is not limited to the above concerns. The feature will try to "sink" by duplicating any top-level preheader instruction (that we are sure is safe to sink) into any user block, then does some dead code cleanup. In particular, this is useful for high RP situations when loop bodies have control flow.
---
Patch is 49.46 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/117247.diff
4 Files Affected:
- (modified) llvm/lib/CodeGen/MachineSink.cpp (+184)
- (added) llvm/test/CodeGen/AMDGPU/aggressive-loop-sink-nonstandard.ll (+20)
- (modified) llvm/test/CodeGen/AMDGPU/machine-sink-ignorable-exec-use.mir (+360)
- (modified) llvm/test/CodeGen/AMDGPU/machine-sink-lane-mask.mir (+139-69)
``````````diff
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index c470bd71dfb29f..d8dd6e8478686d 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -100,6 +100,12 @@ static cl::opt<bool>
"register spills"),
cl::init(false), cl::Hidden);
+static cl::opt<bool> AggressivelySinkInstsIntoCycle(
+ "aggressively-sink-insts-to-avoid-spills",
+ cl::desc("Aggressively sink instructions into cycles to avoid "
+ "register spills"),
+ cl::init(false), cl::Hidden);
+
static cl::opt<unsigned> SinkIntoCycleLimit(
"machine-sink-cycle-limit",
cl::desc(
@@ -256,6 +262,13 @@ class MachineSinking : public MachineFunctionPass {
SmallVectorImpl<MachineInstr *> &Candidates);
bool SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I);
+ bool isDead(const MachineInstr *MI) const;
+ bool AggressivelySinkIntoCycle(
+ MachineCycle *Cycle, MachineInstr &I,
+ DenseMap<MachineInstr *,
+ std::list<std::pair<MachineBasicBlock *, MachineInstr *>>>
+ SunkInstrs);
+
bool isProfitableToSinkTo(Register Reg, MachineInstr &MI,
MachineBasicBlock *MBB,
MachineBasicBlock *SuccToSinkTo,
@@ -679,6 +692,10 @@ void MachineSinking::FindCycleSinkCandidates(
SmallVectorImpl<MachineInstr *> &Candidates) {
for (auto &MI : *BB) {
LLVM_DEBUG(dbgs() << "CycleSink: Analysing candidate: " << MI);
+ if (MI.isDebugInstr()) {
+ LLVM_DEBUG(dbgs() << "CycleSink: Dont sink debug instructions\n");
+ continue;
+ }
if (!TII->shouldSink(MI)) {
LLVM_DEBUG(dbgs() << "CycleSink: Instruction not a candidate for this "
"target\n");
@@ -799,6 +816,30 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
}
}
+ if (AggressivelySinkInstsIntoCycle) {
+ SmallVector<MachineCycle *, 8> Cycles(CI->toplevel_cycles());
+ DenseMap<MachineInstr *,
+ std::list<std::pair<MachineBasicBlock *, MachineInstr *>>>
+ SunkInstrs;
+ for (auto *Cycle : Cycles) {
+ MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
+ if (!Preheader) {
+ LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Can't find preheader\n");
+ continue;
+ }
+ SmallVector<MachineInstr *, 8> Candidates;
+ FindCycleSinkCandidates(Cycle, Preheader, Candidates);
+
+ // Walk the candidates in reverse order so that we start with the use
+ // of a def-use chain, if there is any.
+ for (MachineInstr *I : llvm::reverse(Candidates)) {
+ AggressivelySinkIntoCycle(Cycle, *I, SunkInstrs);
+ EverMadeChange = true;
+ ++NumCycleSunk;
+ }
+ }
+ }
+
HasStoreCache.clear();
StoreInstrCache.clear();
@@ -1574,6 +1615,149 @@ bool MachineSinking::hasStoreBetween(MachineBasicBlock *From,
return HasAliasedStore;
}
+/// Copy paste from DeadMachineInstructionElimImpl
+
+bool MachineSinking::isDead(const MachineInstr *MI) const {
+ // Instructions without side-effects are dead iff they only define dead regs.
+ // This function is hot and this loop returns early in the common case,
+ // so only perform additional checks before this if absolutely necessary.
+ for (const MachineOperand &MO : MI->all_defs()) {
+ Register Reg = MO.getReg();
+ if (Reg.isPhysical()) {
+ return false;
+ } else {
+ if (MO.isDead()) {
+#ifndef NDEBUG
+ // Basic check on the register. All of them should be 'undef'.
+ for (auto &U : MRI->use_nodbg_operands(Reg))
+ assert(U.isUndef() && "'Undef' use on a 'dead' register is found!");
+#endif
+ continue;
+ }
+ for (const MachineInstr &Use : MRI->use_nodbg_instructions(Reg)) {
+ if (&Use != MI)
+ // This def has a non-debug use. Don't delete the instruction!
+ return false;
+ }
+ }
+ }
+
+ // Technically speaking inline asm without side effects and no defs can still
+ // be deleted. But there is so much bad inline asm code out there, we should
+ // let them be.
+ if (MI->isInlineAsm())
+ return false;
+
+ // FIXME: See issue #105950 for why LIFETIME markers are considered dead here.
+ if (MI->isLifetimeMarker())
+ return true;
+
+ // If there are no defs with uses, the instruction might be dead.
+ return MI->wouldBeTriviallyDead();
+}
+
+/// Aggressively sink instructions into cycles. This will aggressively try to
+/// sink all instructions in the top-most preheaders in an attempt to reduce RP.
+/// In particular, it will sink into multiple successor blocks without limits
+/// based on the amount of sinking, or the type of ops being sunk (so long as
+/// they are safe to sink).
+bool MachineSinking::AggressivelySinkIntoCycle(
+ MachineCycle *Cycle, MachineInstr &I,
+ DenseMap<MachineInstr *,
+ std::list<std::pair<MachineBasicBlock *, MachineInstr *>>>
+ SunkInstrs) {
+ LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Finding sink block for: " << I);
+ MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
+ assert(Preheader && "Cycle sink needs a preheader block");
+ SmallVector<std::pair<MachineOperand, MachineInstr *>> Uses;
+ // TODO: support instructions with multiple defs
+ if (I.getNumDefs() > 1)
+ return false;
+
+ MachineOperand DefMO = I.getOperand(0);
+ for (MachineInstr &MI : MRI->use_instructions(DefMO.getReg())) {
+ Uses.push_back({DefMO, &MI});
+ }
+
+ for (std::pair<MachineOperand, MachineInstr *> Entry : Uses) {
+ MachineInstr *MI = Entry.second;
+ LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Analysing use: " << MI);
+ if (MI->isPHI()) {
+ LLVM_DEBUG(
+ dbgs() << "AggressiveCycleSink: Not attempting to sink for PHI.\n");
+ continue;
+ }
+ // We cannot sink before the prologue
+ if (TII->isBasicBlockPrologue(*MI) || MI->isPosition()) {
+ LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Use is BasicBlock prologue, "
+ "can't sink.\n");
+ continue;
+ }
+ if (!Cycle->contains(MI->getParent())) {
+ LLVM_DEBUG(
+ dbgs() << "AggressiveCycleSink: Use not in cycle, can't sink.\n");
+ continue;
+ }
+
+ MachineBasicBlock *SinkBlock = MI->getParent();
+ MachineInstr *NewMI = nullptr;
+
+ // Check for the case in which we have already sunk a copy of this
+ // instruction into the user block.
+ if (SunkInstrs.contains(&I)) {
+ auto SunkBlocks = SunkInstrs[&I];
+ auto Match = std::find_if(
+ SunkBlocks.begin(), SunkBlocks.end(),
+ [&SinkBlock](
+ std::pair<MachineBasicBlock *, MachineInstr *> SunkEntry) {
+ return SunkEntry.first == SinkBlock;
+ });
+ if (Match != SunkBlocks.end()) {
+ LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Already sunk to block: "
+ << printMBBReference(*SinkBlock) << "\n");
+ NewMI = Match->second;
+ }
+ }
+
+ // Create a copy of the instruction in the use block.
+ if (!NewMI) {
+ LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Sinking instruction to block: "
+ << printMBBReference(*SinkBlock) << "\n");
+
+ NewMI = I.getMF()->CloneMachineInstr(&I);
+ if (DefMO.getReg().isVirtual()) {
+ const TargetRegisterClass *TRC = MRI->getRegClass(DefMO.getReg());
+ Register DestReg = MRI->createVirtualRegister(TRC);
+ NewMI->substituteRegister(DefMO.getReg(), DestReg, DefMO.getSubReg(),
+ *TRI);
+ }
+ SinkBlock->insert(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()),
+ NewMI);
+ SunkInstrs[&I].push_back({SinkBlock, NewMI});
+ }
+
+ // Conservatively clear any kill flags on uses of sunk instruction
+ for (MachineOperand &MO : NewMI->operands()) {
+ if (MO.isReg() && MO.readsReg())
+ RegsToClearKillFlags.insert(MO.getReg());
+ }
+
+ // The instruction is moved from its basic block, so do not retain the
+ // debug information.
+ assert(!NewMI->isDebugInstr() && "Should not sink debug inst");
+ NewMI->setDebugLoc(DebugLoc());
+
+ // Replace the use with the newly created virtual register.
+ MachineOperand UseMO = Entry.first;
+ MI->substituteRegister(UseMO.getReg(), NewMI->getOperand(0).getReg(),
+ UseMO.getSubReg(), *TRI);
+ }
+ // If we have replaced all uses, then delete the dead instruction
+ if (isDead(&I))
+ I.eraseFromParent();
+ return true;
+}
+
/// Sink instructions into cycles if profitable. This especially tries to
/// prevent register spills caused by register pressure if there is little to no
/// overhead moving instructions into cycles.
diff --git a/llvm/test/CodeGen/AMDGPU/aggressive-loop-sink-nonstandard.ll b/llvm/test/CodeGen/AMDGPU/aggressive-loop-sink-nonstandard.ll
new file mode 100644
index 00000000000000..72b4495297a1c5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/aggressive-loop-sink-nonstandard.ll
@@ -0,0 +1,20 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 --aggressively-sink-insts-to-avoid-spills=1 < %s | FileCheck -check-prefix=SUNK %s
+
+; Check that various edge cases do not crash the compiler
+
+; Multiple uses of sunk valu, chain of sink candidates
+
+define half @global_agent_atomic_fmin_ret_f16__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, half %val) {
+; SUNK-LABEL: global_agent_atomic_fmin_ret_f16__amdgpu_no_fine_grained_memory:
+ %result = atomicrmw fmin ptr addrspace(1) %ptr, half %val syncscope("agent") seq_cst
+ ret half %result
+}
+
+; Sink candidates with multiple defs
+
+define void @memmove_p5_p5(ptr addrspace(5) align 1 %dst, ptr addrspace(5) align 1 readonly %src, i64 %sz) {
+; SUNK-LABEL: memmove_p5_p5:
+entry:
+ tail call void @llvm.memmove.p5.p5.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 %sz, i1 false)
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-ignorable-exec-use.mir b/llvm/test/CodeGen/AMDGPU/machine-sink-ignorable-exec-use.mir
index efa21052e3ae2f..f93d8f3dde21b6 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-sink-ignorable-exec-use.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-sink-ignorable-exec-use.mir
@@ -1,5 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -run-pass=machine-sink -o - %s | FileCheck -check-prefixes=GFX9 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -run-pass=machine-sink --aggressively-sink-insts-to-avoid-spills=1 -o - %s | FileCheck -check-prefixes=GFX9-SUNK %s
+
---
name: test_sink_fmac_to_only_use
@@ -48,6 +50,47 @@ body: |
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: bb.3:
; GFX9-NEXT: S_ENDPGM 0, implicit [[PHI]], implicit [[PHI1]]
+ ;
+ ; GFX9-SUNK-LABEL: name: test_sink_fmac_to_only_use
+ ; GFX9-SUNK: bb.0:
+ ; GFX9-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; GFX9-SUNK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9-SUNK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX9-SUNK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX9-SUNK-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX9-SUNK-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B64_]]
+ ; GFX9-SUNK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B64_1]]
+ ; GFX9-SUNK-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD killed [[COPY2]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
+ ; GFX9-SUNK-NEXT: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD killed [[COPY3]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
+ ; GFX9-SUNK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+ ; GFX9-SUNK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GFX9-SUNK-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 [[COPY4]](s32), [[S_MOV_B32_]], implicit $exec
+ ; GFX9-SUNK-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF [[V_CMP_LT_I32_e64_]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; GFX9-SUNK-NEXT: S_BRANCH %bb.1
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.1:
+ ; GFX9-SUNK-NEXT: successors: %bb.2(0x80000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_FMAC_F32_e64_1:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[V_FMAC_F32_e64_]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_FMAC_F32_e64_2:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD1]], 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_FMAC_F32_e64_3:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[V_FMAC_F32_e64_2]], 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_F32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e32 [[V_FMAC_F32_e64_]], [[V_FMAC_F32_e64_1]], implicit $mode, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_F32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_F32_e32 [[V_FMAC_F32_e64_2]], [[V_FMAC_F32_e64_3]], implicit $mode, implicit $exec
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.2:
+ ; GFX9-SUNK-NEXT: successors: %bb.3(0x80000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[V_MOV_B32_e32_]], %bb.0, [[V_ADD_F32_e32_]], %bb.1
+ ; GFX9-SUNK-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[V_MOV_B32_e32_1]], %bb.0, [[V_ADD_F32_e32_1]], %bb.1
+ ; GFX9-SUNK-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.3:
+ ; GFX9-SUNK-NEXT: S_ENDPGM 0, implicit [[PHI]], implicit [[PHI1]]
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2
%1:vgpr_32 = COPY $vgpr0
@@ -131,6 +174,48 @@ body: |
; GFX9-NEXT: bb.3:
; GFX9-NEXT: [[V_ADD_F32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_F32_e32 [[V_FMAC_F32_e64_3]], [[V_FMAC_F32_e64_1]], implicit $mode, implicit $exec
; GFX9-NEXT: S_ENDPGM 0, implicit [[PHI]], implicit [[PHI1]]
+ ;
+ ; GFX9-SUNK-LABEL: name: test_no_sink_into_if_cond_multiple_uses
+ ; GFX9-SUNK: bb.0:
+ ; GFX9-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; GFX9-SUNK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9-SUNK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX9-SUNK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX9-SUNK-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX9-SUNK-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B64_]]
+ ; GFX9-SUNK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B64_1]]
+ ; GFX9-SUNK-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD killed [[COPY2]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
+ ; GFX9-SUNK-NEXT: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_FMAC_F32_e64_1:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[V_FMAC_F32_e64_]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX9-SUNK-NEXT: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD killed [[COPY3]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
+ ; GFX9-SUNK-NEXT: [[V_FMAC_F32_e64_2:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD1]], 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_FMAC_F32_e64_3:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[V_FMAC_F32_e64_2]], 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX9-SUNK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+ ; GFX9-SUNK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GFX9-SUNK-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 [[COPY4]](s32), [[S_MOV_B32_]], implicit $exec
+ ; GFX9-SUNK-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF [[V_CMP_LT_I32_e64_]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; GFX9-SUNK-NEXT: S_BRANCH %bb.1
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.1:
+ ; GFX9-SUNK-NEXT: successors: %bb.2(0x80000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: [[V_ADD_F32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e32 [[V_FMAC_F32_e64_]], [[V_FMAC_F32_e64_1]], implicit $mode, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_F32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_F32_e32 [[V_FMAC_F32_e64_2]], [[V_FMAC_F32_e64_3]], implicit $mode, implicit $exec
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.2:
+ ; GFX9-SUNK-NEXT: successors: %bb.3(0x80000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[V_MOV_B32_e32_]], %bb.0, [[V_ADD_F32_e32_]], %bb.1
+ ; GFX9-SUNK-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[V_MOV_B32_e32_1]], %bb.0, [[V_ADD_F32_e32_1]], %bb.1
+ ; GFX9-SUNK-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.3:
+ ; GFX9-SUNK-NEXT: [[V_ADD_F32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_F32_e32 [[V_FMAC_F32_e64_3]], [[V_FMAC_F32_e64_1]], implicit $mode, implicit $exec
+ ; GFX9-SUNK-NEXT: S_ENDPGM 0, implicit [[PHI]], implicit [[PHI1]]
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2
%1:vgpr_32 = COPY $vgpr0
@@ -215,6 +300,48 @@ body: |
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: bb.3:
; GFX9-NEXT: S_ENDPGM 0, implicit [[PHI]], implicit [[PHI1]]
+ ;
+ ; GFX9-SUNK-LABEL: name: no_sink_fmac_not_constant_mode
+ ; GFX9-SUNK: bb.0:
+ ; GFX9-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; GFX9-SUNK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: $mode = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9-SUNK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX9-SUNK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX9-SUNK-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX9-SUNK-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B64_]]
+ ; GFX9-SUNK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B64_1]]
+ ; GFX9-SUNK-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD killed [[COPY2]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
+ ; GFX9-SUNK-NEXT: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_FMAC_F32_e64_1:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[V_FMAC_F32_e64_]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX9-SUNK-NEXT: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD killed [[COPY3]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
+ ; GFX9-SUNK-NEXT: [[V_FMAC_F32_e64_2:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD1]], 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_FMAC_F32_e64_3:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[V_FMAC_F32_e64_2]], 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX9-SUNK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+ ; GFX9-SUNK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/117247
More information about the llvm-commits
mailing list