[llvm] [AMDGPU] Allow rematerialization of instructions with virtual register uses (PR #124327)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 24 11:07:18 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Jeffrey Byrnes (jrbyrnes)
<details>
<summary>Changes</summary>
Remove the restriction that scheduling rematerialization candidates cannot have virtual reg uses.
Currently, this only allows for virtual reg uses which are already live at the rematerialization point, so bring in allUsesAvailableAt to check for this condition. Because of this condition, the uses of the remats will already be live in to the region, so the remat won't increase live-in pressure.
Add an expensive check to check this condition.
---
Patch is 89.95 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/124327.diff
4 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp (+82-15)
- (modified) llvm/lib/Target/AMDGPU/GCNSchedStrategy.h (+3-2)
- (modified) llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir (+1008-48)
- (modified) llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir (+3-3)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index b00105ae9bd528..891f1dbed5d4b2 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1615,6 +1615,59 @@ void GCNSchedStage::revertScheduling() {
DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
}
+/// allUsesAvailableAt - Return true if all registers used by InstToRemat at
+/// OriginalIdx are also available with the same value at RematIdx.
+bool PreRARematStage::allUsesAvailableAt(const MachineInstr *InstToRemat,
+ SlotIndex OriginalIdx,
+ SlotIndex RematIdx) const {
+
+ LiveIntervals *LIS = DAG.LIS;
+ MachineRegisterInfo &MRI = DAG.MRI;
+ OriginalIdx = OriginalIdx.getRegSlot(true);
+ RematIdx = std::max(RematIdx, RematIdx.getRegSlot(true));
+ for (const MachineOperand &MO : InstToRemat->operands()) {
+ if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
+ continue;
+
+ if (!MO.getReg().isVirtual())
+ continue;
+
+ LiveInterval &LI = LIS->getInterval(MO.getReg());
+ const VNInfo *OVNI = LI.getVNInfoAt(OriginalIdx);
+ assert(OVNI);
+
+ // Don't allow rematerialization immediately after the original def.
+ // It would be incorrect if InstToRemat redefines the register.
+ // See PR14098.
+ if (SlotIndex::isSameInstr(OriginalIdx, RematIdx))
+ return false;
+
+ if (OVNI != LI.getVNInfoAt(RematIdx))
+ return false;
+
+ // Check that subrange is live at RematIdx.
+ if (LI.hasSubRanges()) {
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ unsigned SubReg = MO.getSubReg();
+ LaneBitmask LM = SubReg ? TRI->getSubRegIndexLaneMask(SubReg)
+ : MRI.getMaxLaneMaskForVReg(MO.getReg());
+ for (LiveInterval::SubRange &SR : LI.subranges()) {
+ if ((SR.LaneMask & LM).none())
+ continue;
+ if (!SR.liveAt(RematIdx))
+ return false;
+
+ // Early exit if all used lanes are checked. No need to continue.
+ LM &= ~SR.LaneMask;
+ if (LM.none())
+ break;
+ }
+ assert(LM.none());
+ }
+ }
+ return true;
+}
+
void PreRARematStage::collectRematerializableInstructions() {
const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) {
@@ -1629,7 +1682,7 @@ void PreRARematStage::collectRematerializableInstructions() {
MachineOperand *Op = DAG.MRI.getOneDef(Reg);
MachineInstr *Def = Op->getParent();
- if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def))
+ if (Op->getSubReg() != 0 || !DAG.TII->isTriviallyReMaterializable(*Def))
continue;
MachineInstr *UseI = &*DAG.MRI.use_instr_nodbg_begin(Reg);
@@ -1644,8 +1697,13 @@ void PreRARematStage::collectRematerializableInstructions() {
auto It = DAG.LiveIns[I].find(Reg);
if (It != DAG.LiveIns[I].end() && !It->second.none()) {
if (DAG.RegionsWithMinOcc[I]) {
- RematerializableInsts[I][Def] = UseI;
- AddedToRematList = true;
+ SlotIndex DefIdx = DAG.LIS->getInstructionIndex(*Def);
+ SlotIndex UseIdx =
+ DAG.LIS->getInstructionIndex(*UseI).getRegSlot(true);
+ if (allUsesAvailableAt(Def, DefIdx, UseIdx)) {
+ RematerializableInsts[I][Def] = UseI;
+ AddedToRematList = true;
+ }
}
// Collect regions with rematerializable reg as live-in to avoid
@@ -1719,6 +1777,27 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
Register DefReg = Def->getOperand(0).getReg();
TotalSinkableRegs +=
SIRegisterInfo::getNumCoveredRegs(NewLiveIns[I][DefReg]);
+#ifdef EXPENSIVE_CHECKS
+ // All uses are known to be available / live at the remat point. Thus, the
+ // uses should already be live in to the region.
+ for (MachineOperand &MO : Def->operands()) {
+ if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
+ continue;
+
+ Register UseReg = MO.getReg();
+ if (!UseReg.isVirtual())
+ continue;
+
+ LiveInterval &LI = LIS->getInterval(UseReg);
+ LaneBitmask LM = DAG.MRI.getMaxLaneMaskForVReg(MO.getReg());
+ if (LI.hasSubRanges() && MO.getSubReg())
+ LM = DAG.TRI->getSubRegIndexLaneMask(MO.getSubReg());
+
+ assert(NewLiveIns[I].contains(UseReg));
+ LaneBitmask LiveInMask = NewLiveIns[I][UseReg];
+ assert((LiveInMask & LM) == LM);
+ }
+#endif
}
int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs;
unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs(VGPRsAfterSink);
@@ -1842,18 +1921,6 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
return true;
}
-// Copied from MachineLICM
-bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) {
- if (!DAG.TII->isTriviallyReMaterializable(MI))
- return false;
-
- for (const MachineOperand &MO : MI.all_uses())
- if (MO.getReg().isVirtual())
- return false;
-
- return true;
-}
-
// When removing, we will have to check both beginning and ending of the region.
// When inserting, we will only have to check if we are inserting NewMI in front
// of a scheduling region and do not need to check the ending since we will only
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 44db834a41f828..38bb16d5b9b056 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -448,14 +448,15 @@ class PreRARematStage : public GCNSchedStage {
// and single use outside the defining block into RematerializableInsts.
void collectRematerializableInstructions();
- bool isTriviallyReMaterializable(const MachineInstr &MI);
-
// TODO: Should also attempt to reduce RP of SGPRs and AGPRs
// Attempt to reduce RP of VGPR by sinking trivially rematerializable
// instructions. Returns true if we were able to sink instruction(s).
bool sinkTriviallyRematInsts(const GCNSubtarget &ST,
const TargetInstrInfo *TII);
+ bool allUsesAvailableAt(const MachineInstr *InstToRemat,
+ SlotIndex OriginalIdx, SlotIndex RematIdx) const;
+
public:
bool initGCNSchedStage() override;
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
index 9f264de531950b..7252ccfb836de0 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
@@ -84,13 +84,11 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
S_NOP 0
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -191,14 +189,12 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
S_NOP 0, implicit %24
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %23
S_NOP 0, implicit %0, implicit %1
@@ -300,7 +296,6 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
@@ -308,7 +303,6 @@ body: |
S_NOP 0, implicit %23
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -408,7 +402,6 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
@@ -416,7 +409,6 @@ body: |
S_NOP 0, implicit %22, implicit %23
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -529,7 +521,6 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
@@ -537,14 +528,12 @@ body: |
S_NOP 0, implicit %23
bb.2:
- ; predcessors: %bb.1
successors: %bb.3
%25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
S_NOP 0
bb.3:
- ; predecessors: %bb.2
successors: %bb.4
%26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
@@ -552,7 +541,6 @@ body: |
S_NOP 0, implicit %25
bb.4:
- ; predcessors: %bb.3
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -666,7 +654,6 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
@@ -674,7 +661,6 @@ body: |
S_NOP 0, implicit %23, implicit %22
bb.2:
- ; predcessors: %bb.1
successors: %bb.3
%25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
@@ -682,7 +668,6 @@ body: |
S_NOP 0
bb.3:
- ; predecessors: %bb.2
successors: %bb.4
%27:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
@@ -690,7 +675,6 @@ body: |
S_NOP 0, implicit %25, implicit %26
bb.4:
- ; predcessors: %bb.3
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -949,14 +933,12 @@ body: |
undef %23.sub0:vreg_64 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%23.sub1:vreg_64 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
S_NOP 0, implicit %23
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -1053,7 +1035,6 @@ body: |
undef %21.sub0:vreg_128 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%21.sub1:vreg_128 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
@@ -1062,7 +1043,6 @@ body: |
S_NOP 0, implicit %21
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -1581,7 +1561,6 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
@@ -1589,7 +1568,6 @@ body: |
S_NOP 0, implicit %24, implicit %25
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %23
S_NOP 0, implicit %0, implicit %1
@@ -2528,14 +2506,12 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
S_NOP 0, implicit %24
bb.2:
- ; predcessors: %bb.1
successors: %bb.3
S_NOP 0, implicit %23
@@ -2543,7 +2519,6 @@ body: |
S_NOP 0
bb.3:
- ; predecessors: %bb.2
successors: %bb.4
%26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
@@ -2551,7 +2526,6 @@ body: |
S_NOP 0, implicit %26, implicit %27
bb.4:
- ; predcessors: %bb.3
S_NOP 0, implicit %25
S_NOP 0, implicit %0, implicit %1
@@ -2650,7 +2624,6 @@ body: |
%21.sub1:vreg_128 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%21.sub2:vreg_128 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
@@ -2658,7 +2631,6 @@ body: |
S_NOP 0, implicit %21
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -2759,7 +2731,6 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
@@ -2767,7 +2738,6 @@ body: |
S_NOP 0, implicit %23
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %23
S_NOP 0, implicit %0, implicit %1
@@ -5030,7 +5000,6 @@ body: |
%21.sub1:vreg_128 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%21.sub2:vreg_128 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
@@ -5038,7 +5007,6 @@ body: |
S_NOP 0, implicit %21
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -5137,14 +5105,12 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
S_NOP 0, implicit %23, implicit %24
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -5242,7 +5208,6 @@ body: |
%22:vreg_64 = nofpexcept V_CVT_F64_I32_e32 22, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
@@ -5250,7 +5215,6 @@ body: |
S_NOP 0, implicit %22
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -5348,7 +5312,6 @@ body: |
%22:vreg_64 = nofpexcept V_CVT_F64_I32_e32 22, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
@@ -5357,7 +5320,6 @@ body: |
S_NOP 0, implicit %22
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -5456,7 +5418,6 @@ body: |
%22:vreg_64 = nofpexcept V_CVT_F64_I32_e32 22, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
@@ -5466,7 +5427,6 @@ body: |
S_NOP 0, implicit %22
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -5562,14 +5522,12 @@ body: |
%22:vreg_64 = nofpexcept V_CVT_F64_I32_e32 22, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
S_NOP 0, implicit %22, implicit %23
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -5669,14 +5627,12 @@ body: |
undef %23.sub1:vreg_64_align2 = V_MOV_B32_e32 23, implicit $exec
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
S_NOP 0, implicit %24
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %23.sub1
S_NOP 0, implicit %0, implicit %1
@@ -5779,14 +5735,12 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
S_NOP 0, implicit %24
bb.2:
- ; predcessors: %bb.1
DBG_VALUE %23, 0, 0
S_NOP 0, implicit %23
@@ -5889,14 +5843,12 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
S_NOP 0, implicit %24
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %23
S_NOP 0, implicit %0, implicit %1
@@ -5914,3 +5866,1011 @@ body: |
S_ENDPGM 0
...
+---
+name: remat_virtual_vgpr_occ_6
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; GFX908-LABEL: name: remat_virtual_vgpr_occ_6
+ ; GFX908: bb.0:
+ ; GFX908-NEXT: successors: %bb.1(0x80000000)
+ ; GFX908-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+ ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[D...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/124327
More information about the llvm-commits
mailing list