[llvm] [AMDGPU] Allow rematerialization of instructions with virtual register uses (PR #124327)
Jeffrey Byrnes via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 5 17:30:25 PST 2025
https://github.com/jrbyrnes updated https://github.com/llvm/llvm-project/pull/124327
>From 91c69c557d545bc54c246810b5bd0591e356e3cf Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 27 Jan 2025 11:18:46 -0800
Subject: [PATCH 1/5] [AMDGPU] Allow rematerialization of instructions with
virtual register uses
Change-Id: I638fae40762a7f6b9095c50090a247554632eb94
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 88 +-
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 6 +
.../machine-scheduler-sink-trivial-remats.mir | 1013 ++++++++++++++++-
...ssert-dead-def-subreg-use-other-subreg.mir | 6 +-
4 files changed, 1103 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 6e693066de10b6b..23f7eee863d95c1 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1615,6 +1615,61 @@ void GCNSchedStage::revertScheduling() {
DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
}
+bool PreRARematStage::allUsesAvailableAt(const MachineInstr *InstToRemat,
+ SlotIndex OriginalIdx,
+ SlotIndex RematIdx) const {
+
+ LiveIntervals *LIS = DAG.LIS;
+ MachineRegisterInfo &MRI = DAG.MRI;
+ OriginalIdx = OriginalIdx.getRegSlot(true);
+ RematIdx = std::max(RematIdx, RematIdx.getRegSlot(true));
+ for (const MachineOperand &MO : InstToRemat->operands()) {
+ if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
+ continue;
+
+ // Do not attempt to reason about PhysRegs
+ if (!MO.getReg().isVirtual()) {
+ assert(DAG.MRI.isConstantPhysReg(MO.getReg()) ||
+ DAG.TII->isIgnorableUse(MO));
+ continue;
+ }
+
+ LiveInterval &LI = LIS->getInterval(MO.getReg());
+ const VNInfo *OVNI = LI.getVNInfoAt(OriginalIdx);
+ assert(OVNI);
+
+ // Don't allow rematerialization immediately after the original def.
+ // It would be incorrect if InstToRemat redefines the register.
+ // See PR14098.
+ if (SlotIndex::isSameInstr(OriginalIdx, RematIdx))
+ return false;
+
+ if (OVNI != LI.getVNInfoAt(RematIdx))
+ return false;
+
+ // Check that subrange is live at RematIdx.
+ if (LI.hasSubRanges()) {
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ unsigned SubReg = MO.getSubReg();
+ LaneBitmask LM = SubReg ? TRI->getSubRegIndexLaneMask(SubReg)
+ : MRI.getMaxLaneMaskForVReg(MO.getReg());
+ for (LiveInterval::SubRange &SR : LI.subranges()) {
+ if ((SR.LaneMask & LM).none())
+ continue;
+ if (!SR.liveAt(RematIdx))
+ return false;
+
+ // Early exit if all used lanes are checked. No need to continue.
+ LM &= ~SR.LaneMask;
+ if (LM.none())
+ break;
+ }
+ assert(LM.none());
+ }
+ }
+ return true;
+}
+
void PreRARematStage::collectRematerializableInstructions() {
const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) {
@@ -1644,8 +1699,13 @@ void PreRARematStage::collectRematerializableInstructions() {
auto It = DAG.LiveIns[I].find(Reg);
if (It != DAG.LiveIns[I].end() && !It->second.none()) {
if (DAG.RegionsWithMinOcc[I]) {
- RematerializableInsts[I][Def] = UseI;
- AddedToRematList = true;
+ SlotIndex DefIdx = DAG.LIS->getInstructionIndex(*Def);
+ SlotIndex UseIdx =
+ DAG.LIS->getInstructionIndex(*UseI).getRegSlot(true);
+ if (allUsesAvailableAt(Def, DefIdx, UseIdx)) {
+ RematerializableInsts[I][Def] = UseI;
+ AddedToRematList = true;
+ }
}
// Collect regions with rematerializable reg as live-in to avoid
@@ -1719,6 +1779,27 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
Register DefReg = Def->getOperand(0).getReg();
TotalSinkableRegs +=
SIRegisterInfo::getNumCoveredRegs(NewLiveIns[I][DefReg]);
+#ifdef EXPENSIVE_CHECKS
+ // All uses are known to be available / live at the remat point. Thus, the
+ // uses should already be live in to the region.
+ for (MachineOperand &MO : Def->operands()) {
+ if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
+ continue;
+
+ Register UseReg = MO.getReg();
+ if (!UseReg.isVirtual())
+ continue;
+
+ LiveInterval &LI = LIS->getInterval(UseReg);
+ LaneBitmask LM = DAG.MRI.getMaxLaneMaskForVReg(MO.getReg());
+ if (LI.hasSubRanges() && MO.getSubReg())
+ LM = DAG.TRI->getSubRegIndexLaneMask(MO.getSubReg());
+
+ assert(NewLiveIns[I].contains(UseReg));
+ LaneBitmask LiveInMask = NewLiveIns[I][UseReg];
+ assert((LiveInMask & LM) == LM);
+ }
+#endif
}
int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs;
unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs(VGPRsAfterSink);
@@ -1847,9 +1928,6 @@ bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) {
return false;
for (const MachineOperand &MO : MI.all_uses()) {
- if (MO.getReg().isVirtual())
- return false;
-
// We can't remat physreg uses, unless it is a constant or an ignorable
// use (e.g. implicit exec use on VALU instructions)
if (MO.getReg().isPhysical()) {
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 44db834a41f828e..7d3e63df43da60d 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -456,6 +456,12 @@ class PreRARematStage : public GCNSchedStage {
bool sinkTriviallyRematInsts(const GCNSubtarget &ST,
const TargetInstrInfo *TII);
+ /// \p Returns true if all the uses in \p InstToRemat defined at \p
+ /// OriginalIdx are live at \p RematIdx. This only checks liveness of virtual
+ /// reg uses.
+ bool allUsesAvailableAt(const MachineInstr *InstToRemat,
+ SlotIndex OriginalIdx, SlotIndex RematIdx) const;
+
public:
bool initGCNSchedStage() override;
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
index 7662abc0aaf8584..e8dd5eaccd2dba0 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
@@ -5946,12 +5946,12 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
; GFX908-NEXT: S_BRANCH %bb.1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF29]]
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_31]], implicit [[DEF30]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_11]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_12]]
@@ -6274,3 +6274,1012 @@ body: |
S_ENDPGM 0
...
+---
+name: remat_virtual_vgpr_occ_6
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; GFX908-LABEL: name: remat_virtual_vgpr_occ_6
+ ; GFX908: bb.0:
+ ; GFX908-NEXT: successors: %bb.1(0x80000000)
+ ; GFX908-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+ ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF31:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
+ ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+ ; GFX908-NEXT: S_BRANCH %bb.1
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.1:
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_30]], implicit [[V_CVT_I32_F32_e32_25]], implicit [[DEF22]], implicit [[DEF27]]
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_31]], implicit [[V_CVT_I32_F32_e32_26]], implicit [[DEF23]], implicit [[DEF28]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_22]], implicit [[V_CVT_I32_F32_e32_27]], implicit [[DEF24]], implicit [[DEF29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF25]], implicit [[DEF30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_24]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF26]], implicit [[DEF31]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_6]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_7]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_8]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_9]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_10]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_11]], implicit [[V_CVT_I32_F32_e32_12]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_13]], implicit [[V_CVT_I32_F32_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_13]], implicit [[V_CVT_I32_F32_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_15]], implicit [[V_CVT_I32_F32_e32_16]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_17]], implicit [[V_CVT_I32_F32_e32_18]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_19]], implicit [[V_CVT_I32_F32_e32_20]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_21]]
+ ; GFX908-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr0_sgpr1
+
+ %1:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ %2:vgpr_32(s32) = COPY $vgpr0
+ %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ %4:sreg_64 = V_CMP_GT_U32_e64 %3.sub0, %2(s32), implicit $exec
+ undef %5.sub1:sreg_64 = S_MOV_B32 0
+ %5.sub0:sreg_64 = COPY %3.sub1
+ %10:vgpr_32 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vgpr_32 = IMPLICIT_DEF
+ %13:vgpr_32 = IMPLICIT_DEF
+ %14:vgpr_32 = IMPLICIT_DEF
+ %15:vgpr_32 = IMPLICIT_DEF
+ %16:vgpr_32 = IMPLICIT_DEF
+ %17:vgpr_32 = IMPLICIT_DEF
+ %18:vgpr_32 = IMPLICIT_DEF
+ %19:vgpr_32 = IMPLICIT_DEF
+ %20:vgpr_32 = IMPLICIT_DEF
+ %21:vgpr_32 = IMPLICIT_DEF
+ %22:vgpr_32 = IMPLICIT_DEF
+ %23:vgpr_32 = IMPLICIT_DEF
+ %24:vgpr_32 = IMPLICIT_DEF
+ %25:vgpr_32 = IMPLICIT_DEF
+ %26:vgpr_32 = IMPLICIT_DEF
+ %27:vgpr_32 = IMPLICIT_DEF
+ %28:vgpr_32 = IMPLICIT_DEF
+ %29:vgpr_32 = IMPLICIT_DEF
+ %30:vgpr_32 = IMPLICIT_DEF
+ %31:vgpr_32 = IMPLICIT_DEF
+ %32:vgpr_32 = IMPLICIT_DEF
+ %33:vgpr_32 = IMPLICIT_DEF
+ %34:vgpr_32 = IMPLICIT_DEF
+ %35:vgpr_32 = IMPLICIT_DEF
+ %36:vgpr_32 = IMPLICIT_DEF
+ %37:vgpr_32 = IMPLICIT_DEF
+ %38:vgpr_32 = IMPLICIT_DEF
+ %39:vgpr_32 = IMPLICIT_DEF
+ %40:vgpr_32 = IMPLICIT_DEF
+ %41:vgpr_32 = IMPLICIT_DEF
+ %50:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %10, implicit $exec, implicit $mode
+ %51:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %11, implicit $exec, implicit $mode
+ %52:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %12, implicit $exec, implicit $mode
+ %53:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %13, implicit $exec, implicit $mode
+ %54:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %14, implicit $exec, implicit $mode
+ %55:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %15, implicit $exec, implicit $mode
+ %56:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %16, implicit $exec, implicit $mode
+ %57:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %17, implicit $exec, implicit $mode
+ %58:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %18, implicit $exec, implicit $mode
+ %59:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %19, implicit $exec, implicit $mode
+ %60:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %20, implicit $exec, implicit $mode
+ %61:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %21, implicit $exec, implicit $mode
+ %62:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %22, implicit $exec, implicit $mode
+ %63:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %23, implicit $exec, implicit $mode
+ %64:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %24, implicit $exec, implicit $mode
+ %65:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %25, implicit $exec, implicit $mode
+ %66:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %26, implicit $exec, implicit $mode
+ %67:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %27, implicit $exec, implicit $mode
+ %68:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %28, implicit $exec, implicit $mode
+ %69:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %29, implicit $exec, implicit $mode
+ %70:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %30, implicit $exec, implicit $mode
+ %71:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %31, implicit $exec, implicit $mode
+ %72:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %32, implicit $exec, implicit $mode
+ %73:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %33, implicit $exec, implicit $mode
+ %74:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %34, implicit $exec, implicit $mode
+ %75:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %35, implicit $exec, implicit $mode
+ %76:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %36, implicit $exec, implicit $mode
+ %77:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %37, implicit $exec, implicit $mode
+ %78:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %38, implicit $exec, implicit $mode
+ %79:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %39, implicit $exec, implicit $mode
+ %80:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %40, implicit $exec, implicit $mode
+ %81:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %41, implicit $exec, implicit $mode
+ S_BRANCH %bb.4
+
+ bb.4:
+ S_NOP 0, implicit %50, implicit %60, implicit %10, implicit %20
+ S_NOP 0, implicit %51, implicit %61, implicit %11, implicit %21
+ S_NOP 0, implicit %52, implicit %62, implicit %12, implicit %22
+ S_NOP 0, implicit %53, implicit %63, implicit %13, implicit %23
+ S_NOP 0, implicit %54, implicit %64, implicit %14, implicit %24
+ S_NOP 0, implicit %55, implicit %65
+ S_NOP 0, implicit %56, implicit %66
+ S_NOP 0, implicit %57, implicit %67
+ S_NOP 0, implicit %58, implicit %68
+ S_NOP 0, implicit %59, implicit %69
+ S_NOP 0, implicit %70, implicit %71
+ S_NOP 0, implicit %72, implicit %73
+ S_NOP 0, implicit %72, implicit %73
+ S_NOP 0, implicit %74, implicit %75
+ S_NOP 0, implicit %76, implicit %77
+ S_NOP 0, implicit %78, implicit %79
+ S_NOP 0, implicit %80
+ S_ENDPGM 0
+...
+
+---
+name: remat_virtual_vgpr_uses_not_available
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; GFX908-LABEL: name: remat_virtual_vgpr_uses_not_available
+ ; GFX908: bb.0:
+ ; GFX908-NEXT: successors: %bb.1(0x80000000)
+ ; GFX908-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+ ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF31:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
+ ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+ ; GFX908-NEXT: S_BRANCH %bb.1
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.1:
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_6]], implicit [[DEF22]], implicit [[DEF27]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_22]], implicit [[V_CVT_I32_F32_e32_27]], implicit [[DEF23]], implicit [[DEF28]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF24]], implicit [[DEF29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_24]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF25]], implicit [[DEF30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_25]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF26]], implicit [[DEF31]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_26]], implicit [[V_CVT_I32_F32_e32_31]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_7]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_8]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_9]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_10]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_11]], implicit [[V_CVT_I32_F32_e32_12]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_13]], implicit [[V_CVT_I32_F32_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_13]], implicit [[V_CVT_I32_F32_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_15]], implicit [[V_CVT_I32_F32_e32_16]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_17]], implicit [[V_CVT_I32_F32_e32_18]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_19]], implicit [[V_CVT_I32_F32_e32_20]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_21]]
+ ; GFX908-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr0_sgpr1
+
+ %1:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ %2:vgpr_32(s32) = COPY $vgpr0
+ %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ %4:sreg_64 = V_CMP_GT_U32_e64 %3.sub0, %2(s32), implicit $exec
+ undef %5.sub1:sreg_64 = S_MOV_B32 0
+ %5.sub0:sreg_64 = COPY %3.sub1
+ %10:vgpr_32 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vgpr_32 = IMPLICIT_DEF
+ %13:vgpr_32 = IMPLICIT_DEF
+ %14:vgpr_32 = IMPLICIT_DEF
+ %15:vgpr_32 = IMPLICIT_DEF
+ %16:vgpr_32 = IMPLICIT_DEF
+ %17:vgpr_32 = IMPLICIT_DEF
+ %18:vgpr_32 = IMPLICIT_DEF
+ %19:vgpr_32 = IMPLICIT_DEF
+ %20:vgpr_32 = IMPLICIT_DEF
+ %21:vgpr_32 = IMPLICIT_DEF
+ %22:vgpr_32 = IMPLICIT_DEF
+ %23:vgpr_32 = IMPLICIT_DEF
+ %24:vgpr_32 = IMPLICIT_DEF
+ %25:vgpr_32 = IMPLICIT_DEF
+ %26:vgpr_32 = IMPLICIT_DEF
+ %27:vgpr_32 = IMPLICIT_DEF
+ %28:vgpr_32 = IMPLICIT_DEF
+ %29:vgpr_32 = IMPLICIT_DEF
+ %30:vgpr_32 = IMPLICIT_DEF
+ %31:vgpr_32 = IMPLICIT_DEF
+ %32:vgpr_32 = IMPLICIT_DEF
+ %33:vgpr_32 = IMPLICIT_DEF
+ %34:vgpr_32 = IMPLICIT_DEF
+ %35:vgpr_32 = IMPLICIT_DEF
+ %36:vgpr_32 = IMPLICIT_DEF
+ %37:vgpr_32 = IMPLICIT_DEF
+ %38:vgpr_32 = IMPLICIT_DEF
+ %39:vgpr_32 = IMPLICIT_DEF
+ %40:vgpr_32 = IMPLICIT_DEF
+ %41:vgpr_32 = IMPLICIT_DEF
+ %50:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %10, implicit $exec, implicit $mode
+ %51:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %11, implicit $exec, implicit $mode
+ %52:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %12, implicit $exec, implicit $mode
+ %53:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %13, implicit $exec, implicit $mode
+ %54:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %14, implicit $exec, implicit $mode
+ %55:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %15, implicit $exec, implicit $mode
+ %56:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %16, implicit $exec, implicit $mode
+ %57:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %17, implicit $exec, implicit $mode
+ %58:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %18, implicit $exec, implicit $mode
+ %59:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %19, implicit $exec, implicit $mode
+ %60:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %20, implicit $exec, implicit $mode
+ %61:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %21, implicit $exec, implicit $mode
+ %62:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %22, implicit $exec, implicit $mode
+ %63:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %23, implicit $exec, implicit $mode
+ %64:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %24, implicit $exec, implicit $mode
+ %65:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %25, implicit $exec, implicit $mode
+ %66:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %26, implicit $exec, implicit $mode
+ %67:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %27, implicit $exec, implicit $mode
+ %68:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %28, implicit $exec, implicit $mode
+ %69:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %29, implicit $exec, implicit $mode
+ %70:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %30, implicit $exec, implicit $mode
+ %71:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %31, implicit $exec, implicit $mode
+ %72:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %32, implicit $exec, implicit $mode
+ %73:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %33, implicit $exec, implicit $mode
+ %74:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %34, implicit $exec, implicit $mode
+ %75:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %35, implicit $exec, implicit $mode
+ %76:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %36, implicit $exec, implicit $mode
+ %77:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %37, implicit $exec, implicit $mode
+ %78:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %38, implicit $exec, implicit $mode
+ %79:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %39, implicit $exec, implicit $mode
+ %80:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %40, implicit $exec, implicit $mode
+ %81:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %41, implicit $exec, implicit $mode
+ S_BRANCH %bb.4
+
+ bb.4:
+ S_NOP 0, implicit %50, implicit %60, implicit %11, implicit %21
+ S_NOP 0, implicit %51, implicit %61, implicit %12, implicit %22
+ S_NOP 0, implicit %52, implicit %62, implicit %13, implicit %23
+ S_NOP 0, implicit %53, implicit %63, implicit %14, implicit %24
+ S_NOP 0, implicit %54, implicit %64, implicit %15, implicit %25
+ S_NOP 0, implicit %55, implicit %65
+ S_NOP 0, implicit %56, implicit %66
+ S_NOP 0, implicit %57, implicit %67
+ S_NOP 0, implicit %58, implicit %68
+ S_NOP 0, implicit %59, implicit %69
+ S_NOP 0, implicit %70, implicit %71
+ S_NOP 0, implicit %72, implicit %73
+ S_NOP 0, implicit %72, implicit %73
+ S_NOP 0, implicit %74, implicit %75
+ S_NOP 0, implicit %76, implicit %77
+ S_NOP 0, implicit %78, implicit %79
+ S_NOP 0, implicit %80
+ S_ENDPGM 0
+...
+
+---
+name: remat_virtual_vgpr_subreg_occ_6
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; GFX908-LABEL: name: remat_virtual_vgpr_subreg_occ_6
+ ; GFX908: bb.0:
+ ; GFX908-NEXT: successors: %bb.1(0x80000000)
+ ; GFX908-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+ ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: undef [[DEF:%[0-9]+]].sub0:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF:%[0-9]+]].sub1:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF:%[0-9]+]].sub2:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF:%[0-9]+]].sub3:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
+ ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF31:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]].sub2, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_BRANCH %bb.1
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.1:
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]].sub0, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_30]], implicit [[V_CVT_I32_F32_e32_25]], implicit [[DEF]].sub0, implicit [[DEF]].sub2
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_31]], implicit [[V_CVT_I32_F32_e32_26]], implicit [[DEF24]], implicit [[DEF28]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_22]], implicit [[V_CVT_I32_F32_e32_27]], implicit [[DEF25]], implicit [[DEF29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF26]], implicit [[DEF30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_24]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF27]], implicit [[DEF31]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_6]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_7]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_8]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_9]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_10]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_11]], implicit [[V_CVT_I32_F32_e32_12]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_13]], implicit [[V_CVT_I32_F32_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_13]], implicit [[V_CVT_I32_F32_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_15]], implicit [[V_CVT_I32_F32_e32_16]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_17]], implicit [[V_CVT_I32_F32_e32_18]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_19]], implicit [[V_CVT_I32_F32_e32_20]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_21]]
+ ; GFX908-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr0_sgpr1
+
+ %1:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ %2:vgpr_32(s32) = COPY $vgpr0
+ %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ %4:sreg_64 = V_CMP_GT_U32_e64 %3.sub0, %2(s32), implicit $exec
+ undef %5.sub1:sreg_64 = S_MOV_B32 0
+ %5.sub0:sreg_64 = COPY %3.sub1
+ undef %10.sub0:vreg_512 = IMPLICIT_DEF
+ %10.sub1:vreg_512 = IMPLICIT_DEF
+ %10.sub2:vreg_512 = IMPLICIT_DEF
+ %10.sub3:vreg_512 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vgpr_32 = IMPLICIT_DEF
+ %13:vgpr_32 = IMPLICIT_DEF
+ %14:vgpr_32 = IMPLICIT_DEF
+ %15:vgpr_32 = IMPLICIT_DEF
+ %16:vgpr_32 = IMPLICIT_DEF
+ %17:vgpr_32 = IMPLICIT_DEF
+ %18:vgpr_32 = IMPLICIT_DEF
+ %19:vgpr_32 = IMPLICIT_DEF
+ %20:vgpr_32 = IMPLICIT_DEF
+ %21:vgpr_32 = IMPLICIT_DEF
+ %22:vgpr_32 = IMPLICIT_DEF
+ %23:vgpr_32 = IMPLICIT_DEF
+ %24:vgpr_32 = IMPLICIT_DEF
+ %25:vgpr_32 = IMPLICIT_DEF
+ %26:vgpr_32 = IMPLICIT_DEF
+ %27:vgpr_32 = IMPLICIT_DEF
+ %28:vgpr_32 = IMPLICIT_DEF
+ %29:vgpr_32 = IMPLICIT_DEF
+ %30:vgpr_32 = IMPLICIT_DEF
+ %31:vgpr_32 = IMPLICIT_DEF
+ %32:vgpr_32 = IMPLICIT_DEF
+ %33:vgpr_32 = IMPLICIT_DEF
+ %34:vgpr_32 = IMPLICIT_DEF
+ %35:vgpr_32 = IMPLICIT_DEF
+ %36:vgpr_32 = IMPLICIT_DEF
+ %37:vgpr_32 = IMPLICIT_DEF
+ %38:vgpr_32 = IMPLICIT_DEF
+ %39:vgpr_32 = IMPLICIT_DEF
+ %40:vgpr_32 = IMPLICIT_DEF
+ %41:vgpr_32 = IMPLICIT_DEF
+ %50:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %10.sub0, implicit $exec, implicit $mode
+ %51:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %11, implicit $exec, implicit $mode
+ %52:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %12, implicit $exec, implicit $mode
+ %53:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %13, implicit $exec, implicit $mode
+ %54:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %14, implicit $exec, implicit $mode
+ %55:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %15, implicit $exec, implicit $mode
+ %56:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %16, implicit $exec, implicit $mode
+ %57:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %17, implicit $exec, implicit $mode
+ %58:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %18, implicit $exec, implicit $mode
+ %59:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %19, implicit $exec, implicit $mode
+ %60:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %10.sub2, implicit $exec, implicit $mode
+ %61:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %21, implicit $exec, implicit $mode
+ %62:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %22, implicit $exec, implicit $mode
+ %63:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %23, implicit $exec, implicit $mode
+ %64:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %24, implicit $exec, implicit $mode
+ %65:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %25, implicit $exec, implicit $mode
+ %66:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %26, implicit $exec, implicit $mode
+ %67:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %27, implicit $exec, implicit $mode
+ %68:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %28, implicit $exec, implicit $mode
+ %69:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %29, implicit $exec, implicit $mode
+ %70:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %30, implicit $exec, implicit $mode
+ %71:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %31, implicit $exec, implicit $mode
+ %72:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %32, implicit $exec, implicit $mode
+ %73:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %33, implicit $exec, implicit $mode
+ %74:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %34, implicit $exec, implicit $mode
+ %75:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %35, implicit $exec, implicit $mode
+ %76:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %36, implicit $exec, implicit $mode
+ %77:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %37, implicit $exec, implicit $mode
+ %78:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %38, implicit $exec, implicit $mode
+ %79:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %39, implicit $exec, implicit $mode
+ %80:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %40, implicit $exec, implicit $mode
+ %81:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %41, implicit $exec, implicit $mode
+ S_BRANCH %bb.4
+
+ bb.4:
+ S_NOP 0, implicit %50, implicit %60, implicit %10.sub0, implicit %10.sub2
+ S_NOP 0, implicit %51, implicit %61, implicit %11, implicit %21
+ S_NOP 0, implicit %52, implicit %62, implicit %12, implicit %22
+ S_NOP 0, implicit %53, implicit %63, implicit %13, implicit %23
+ S_NOP 0, implicit %54, implicit %64, implicit %14, implicit %24
+ S_NOP 0, implicit %55, implicit %65
+ S_NOP 0, implicit %56, implicit %66
+ S_NOP 0, implicit %57, implicit %67
+ S_NOP 0, implicit %58, implicit %68
+ S_NOP 0, implicit %59, implicit %69
+ S_NOP 0, implicit %70, implicit %71
+ S_NOP 0, implicit %72, implicit %73
+ S_NOP 0, implicit %72, implicit %73
+ S_NOP 0, implicit %74, implicit %75
+ S_NOP 0, implicit %76, implicit %77
+ S_NOP 0, implicit %78, implicit %79
+ S_NOP 0, implicit %80
+ S_ENDPGM 0
+...
+
+---
+name: remat_virtual_vgpr_subreg_uses_not_available
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; GFX908-LABEL: name: remat_virtual_vgpr_subreg_uses_not_available
+ ; GFX908: bb.0:
+ ; GFX908-NEXT: successors: %bb.1(0x80000000)
+ ; GFX908-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+ ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: undef [[DEF:%[0-9]+]].sub0:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF:%[0-9]+]].sub1:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF:%[0-9]+]].sub2:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF:%[0-9]+]].sub3:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
+ ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF31:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]].sub0, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]].sub2, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_BRANCH %bb.1
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.1:
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_22]], implicit [[V_CVT_I32_F32_e32_27]], implicit [[DEF]].sub1, implicit [[DEF]].sub3
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_6]], implicit [[DEF24]], implicit [[DEF28]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF25]], implicit [[DEF29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_24]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF26]], implicit [[DEF30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_25]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF27]], implicit [[DEF31]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_26]], implicit [[V_CVT_I32_F32_e32_31]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_7]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_8]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_9]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_10]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_11]], implicit [[V_CVT_I32_F32_e32_12]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_13]], implicit [[V_CVT_I32_F32_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_13]], implicit [[V_CVT_I32_F32_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_15]], implicit [[V_CVT_I32_F32_e32_16]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_17]], implicit [[V_CVT_I32_F32_e32_18]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_19]], implicit [[V_CVT_I32_F32_e32_20]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_21]]
+ ; GFX908-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr0_sgpr1
+
+ %1:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ %2:vgpr_32(s32) = COPY $vgpr0
+ %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ %4:sreg_64 = V_CMP_GT_U32_e64 %3.sub0, %2(s32), implicit $exec
+ undef %5.sub1:sreg_64 = S_MOV_B32 0
+ %5.sub0:sreg_64 = COPY %3.sub1
+ undef %10.sub0:vreg_512 = IMPLICIT_DEF
+ %10.sub1:vreg_512 = IMPLICIT_DEF
+ %10.sub2:vreg_512 = IMPLICIT_DEF
+ %10.sub3:vreg_512 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vgpr_32 = IMPLICIT_DEF
+ %13:vgpr_32 = IMPLICIT_DEF
+ %14:vgpr_32 = IMPLICIT_DEF
+ %15:vgpr_32 = IMPLICIT_DEF
+ %16:vgpr_32 = IMPLICIT_DEF
+ %17:vgpr_32 = IMPLICIT_DEF
+ %18:vgpr_32 = IMPLICIT_DEF
+ %19:vgpr_32 = IMPLICIT_DEF
+ %20:vgpr_32 = IMPLICIT_DEF
+ %21:vgpr_32 = IMPLICIT_DEF
+ %22:vgpr_32 = IMPLICIT_DEF
+ %23:vgpr_32 = IMPLICIT_DEF
+ %24:vgpr_32 = IMPLICIT_DEF
+ %25:vgpr_32 = IMPLICIT_DEF
+ %26:vgpr_32 = IMPLICIT_DEF
+ %27:vgpr_32 = IMPLICIT_DEF
+ %28:vgpr_32 = IMPLICIT_DEF
+ %29:vgpr_32 = IMPLICIT_DEF
+ %30:vgpr_32 = IMPLICIT_DEF
+ %31:vgpr_32 = IMPLICIT_DEF
+ %32:vgpr_32 = IMPLICIT_DEF
+ %33:vgpr_32 = IMPLICIT_DEF
+ %34:vgpr_32 = IMPLICIT_DEF
+ %35:vgpr_32 = IMPLICIT_DEF
+ %36:vgpr_32 = IMPLICIT_DEF
+ %37:vgpr_32 = IMPLICIT_DEF
+ %38:vgpr_32 = IMPLICIT_DEF
+ %39:vgpr_32 = IMPLICIT_DEF
+ %40:vgpr_32 = IMPLICIT_DEF
+ %41:vgpr_32 = IMPLICIT_DEF
+ %50:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %10.sub0, implicit $exec, implicit $mode
+ %51:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %11, implicit $exec, implicit $mode
+ %52:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %12, implicit $exec, implicit $mode
+ %53:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %13, implicit $exec, implicit $mode
+ %54:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %14, implicit $exec, implicit $mode
+ %55:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %15, implicit $exec, implicit $mode
+ %56:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %16, implicit $exec, implicit $mode
+ %57:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %17, implicit $exec, implicit $mode
+ %58:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %18, implicit $exec, implicit $mode
+ %59:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %19, implicit $exec, implicit $mode
+ %60:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %10.sub2, implicit $exec, implicit $mode
+ %61:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %21, implicit $exec, implicit $mode
+ %62:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %22, implicit $exec, implicit $mode
+ %63:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %23, implicit $exec, implicit $mode
+ %64:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %24, implicit $exec, implicit $mode
+ %65:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %25, implicit $exec, implicit $mode
+ %66:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %26, implicit $exec, implicit $mode
+ %67:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %27, implicit $exec, implicit $mode
+ %68:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %28, implicit $exec, implicit $mode
+ %69:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %29, implicit $exec, implicit $mode
+ %70:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %30, implicit $exec, implicit $mode
+ %71:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %31, implicit $exec, implicit $mode
+ %72:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %32, implicit $exec, implicit $mode
+ %73:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %33, implicit $exec, implicit $mode
+ %74:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %34, implicit $exec, implicit $mode
+ %75:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %35, implicit $exec, implicit $mode
+ %76:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %36, implicit $exec, implicit $mode
+ %77:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %37, implicit $exec, implicit $mode
+ %78:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %38, implicit $exec, implicit $mode
+ %79:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %39, implicit $exec, implicit $mode
+ %80:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %40, implicit $exec, implicit $mode
+ %81:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %41, implicit $exec, implicit $mode
+ S_BRANCH %bb.4
+
+ bb.4:
+ S_NOP 0, implicit %50, implicit %60, implicit %10.sub1, implicit %10.sub3
+ S_NOP 0, implicit %51, implicit %61, implicit %12, implicit %22
+ S_NOP 0, implicit %52, implicit %62, implicit %13, implicit %23
+ S_NOP 0, implicit %53, implicit %63, implicit %14, implicit %24
+ S_NOP 0, implicit %54, implicit %64, implicit %15, implicit %25
+ S_NOP 0, implicit %55, implicit %65
+ S_NOP 0, implicit %56, implicit %66
+ S_NOP 0, implicit %57, implicit %67
+ S_NOP 0, implicit %58, implicit %68
+ S_NOP 0, implicit %59, implicit %69
+ S_NOP 0, implicit %70, implicit %71
+ S_NOP 0, implicit %72, implicit %73
+ S_NOP 0, implicit %72, implicit %73
+ S_NOP 0, implicit %74, implicit %75
+ S_NOP 0, implicit %76, implicit %77
+ S_NOP 0, implicit %78, implicit %79
+ S_NOP 0, implicit %80
+ S_ENDPGM 0
+...
+
+
+---
+name: remat_virtual_vgpr_subreg_def
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; GFX908-LABEL: name: remat_virtual_vgpr_subreg_def
+ ; GFX908: bb.0:
+ ; GFX908-NEXT: successors: %bb.1(0x80000000)
+ ; GFX908-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+ ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: undef [[DEF:%[0-9]+]].sub0:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF:%[0-9]+]].sub1:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF:%[0-9]+]].sub2:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF:%[0-9]+]].sub3:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
+ ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+ ; GFX908-NEXT: undef [[V_CVT_I32_F32_e32_1:%[0-9]+]].sub1:vreg_64 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF31:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: undef [[V_CVT_I32_F32_e32_1:%[0-9]+]].sub0:vreg_64 = nofpexcept V_CVT_I32_F32_e32 [[DEF]].sub0, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_BRANCH %bb.1
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.1:
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]].sub2, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]].sub0, implicit [[V_CVT_I32_F32_e32_26]], implicit [[DEF]].sub0, implicit [[DEF]].sub2
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_27]], implicit [[DEF1]], implicit [[DEF28]]
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF25]], implicit [[DEF29]]
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_29]], implicit [[V_CVT_I32_F32_e32_24]], implicit [[DEF26]], implicit [[DEF30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_30]], implicit [[V_CVT_I32_F32_e32_25]], implicit [[DEF27]], implicit [[DEF31]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_7]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_8]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_9]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_10]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_6]], implicit [[V_CVT_I32_F32_e32_11]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_12]], implicit [[V_CVT_I32_F32_e32_13]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_14]], implicit [[V_CVT_I32_F32_e32_15]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_14]], implicit [[V_CVT_I32_F32_e32_15]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_16]], implicit [[V_CVT_I32_F32_e32_17]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_18]], implicit [[V_CVT_I32_F32_e32_19]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_20]], implicit [[V_CVT_I32_F32_e32_21]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_22]]
+ ; GFX908-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr0_sgpr1
+
+ %1:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ %2:vgpr_32(s32) = COPY $vgpr0
+ %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ %4:sreg_64 = V_CMP_GT_U32_e64 %3.sub0, %2(s32), implicit $exec
+ undef %5.sub1:sreg_64 = S_MOV_B32 0
+ %5.sub0:sreg_64 = COPY %3.sub1
+ undef %10.sub0:vreg_512 = IMPLICIT_DEF
+ %10.sub1:vreg_512 = IMPLICIT_DEF
+ %10.sub2:vreg_512 = IMPLICIT_DEF
+ %10.sub3:vreg_512 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vgpr_32 = IMPLICIT_DEF
+ %13:vgpr_32 = IMPLICIT_DEF
+ %14:vgpr_32 = IMPLICIT_DEF
+ %15:vgpr_32 = IMPLICIT_DEF
+ %16:vgpr_32 = IMPLICIT_DEF
+ %17:vgpr_32 = IMPLICIT_DEF
+ %18:vgpr_32 = IMPLICIT_DEF
+ %19:vgpr_32 = IMPLICIT_DEF
+ %20:vgpr_32 = IMPLICIT_DEF
+ %21:vgpr_32 = IMPLICIT_DEF
+ %22:vgpr_32 = IMPLICIT_DEF
+ %23:vgpr_32 = IMPLICIT_DEF
+ %24:vgpr_32 = IMPLICIT_DEF
+ %25:vgpr_32 = IMPLICIT_DEF
+ %26:vgpr_32 = IMPLICIT_DEF
+ %27:vgpr_32 = IMPLICIT_DEF
+ %28:vgpr_32 = IMPLICIT_DEF
+ %29:vgpr_32 = IMPLICIT_DEF
+ %30:vgpr_32 = IMPLICIT_DEF
+ %31:vgpr_32 = IMPLICIT_DEF
+ %32:vgpr_32 = IMPLICIT_DEF
+ %33:vgpr_32 = IMPLICIT_DEF
+ %34:vgpr_32 = IMPLICIT_DEF
+ %35:vgpr_32 = IMPLICIT_DEF
+ %36:vgpr_32 = IMPLICIT_DEF
+ %37:vgpr_32 = IMPLICIT_DEF
+ %38:vgpr_32 = IMPLICIT_DEF
+ %39:vgpr_32 = IMPLICIT_DEF
+ %40:vgpr_32 = IMPLICIT_DEF
+ %41:vgpr_32 = IMPLICIT_DEF
+ undef %50.sub0:vreg_64 = nofpexcept V_CVT_I32_F32_e32 %10.sub0, implicit $exec, implicit $mode
+ %50.sub1:vreg_64 = nofpexcept V_CVT_I32_F32_e32 %11, implicit $exec, implicit $mode
+ %52:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %12, implicit $exec, implicit $mode
+ %53:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %13, implicit $exec, implicit $mode
+ %54:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %14, implicit $exec, implicit $mode
+ %55:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %15, implicit $exec, implicit $mode
+ %56:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %16, implicit $exec, implicit $mode
+ %57:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %17, implicit $exec, implicit $mode
+ %58:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %18, implicit $exec, implicit $mode
+ %59:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %19, implicit $exec, implicit $mode
+ %60:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %10.sub2, implicit $exec, implicit $mode
+ %61:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %21, implicit $exec, implicit $mode
+ %62:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %22, implicit $exec, implicit $mode
+ %63:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %23, implicit $exec, implicit $mode
+ %64:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %24, implicit $exec, implicit $mode
+ %65:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %25, implicit $exec, implicit $mode
+ %66:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %26, implicit $exec, implicit $mode
+ %67:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %27, implicit $exec, implicit $mode
+ %68:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %28, implicit $exec, implicit $mode
+ %69:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %29, implicit $exec, implicit $mode
+ %70:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %30, implicit $exec, implicit $mode
+ %71:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %31, implicit $exec, implicit $mode
+ %72:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %32, implicit $exec, implicit $mode
+ %73:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %33, implicit $exec, implicit $mode
+ %74:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %34, implicit $exec, implicit $mode
+ %75:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %35, implicit $exec, implicit $mode
+ %76:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %36, implicit $exec, implicit $mode
+ %77:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %37, implicit $exec, implicit $mode
+ %78:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %38, implicit $exec, implicit $mode
+ %79:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %39, implicit $exec, implicit $mode
+ %80:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %40, implicit $exec, implicit $mode
+ %81:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %41, implicit $exec, implicit $mode
+ S_BRANCH %bb.4
+
+ bb.4:
+ S_NOP 0, implicit %50.sub0, implicit %60, implicit %10.sub0, implicit %10.sub2
+ S_NOP 0, implicit %52, implicit %61, implicit %11, implicit %21
+ S_NOP 0, implicit %52, implicit %62, implicit %12, implicit %22
+ S_NOP 0, implicit %53, implicit %63, implicit %13, implicit %23
+ S_NOP 0, implicit %54, implicit %64, implicit %14, implicit %24
+ S_NOP 0, implicit %55, implicit %65
+ S_NOP 0, implicit %56, implicit %66
+ S_NOP 0, implicit %57, implicit %67
+ S_NOP 0, implicit %58, implicit %68
+ S_NOP 0, implicit %59, implicit %69
+ S_NOP 0, implicit %70, implicit %71
+ S_NOP 0, implicit %72, implicit %73
+ S_NOP 0, implicit %72, implicit %73
+ S_NOP 0, implicit %74, implicit %75
+ S_NOP 0, implicit %76, implicit %77
+ S_NOP 0, implicit %78, implicit %79
+ S_NOP 0, implicit %80
+ S_ENDPGM 0
+...
+
diff --git a/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir b/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir
index 3f88f98e343712f..c933fb0de586407 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir
@@ -23,7 +23,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_MOV_B32_e32_:%[0-9]+]].sub3:vreg_512 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 0, [[V_MOV_B32_e32_1]], implicit $exec
+ ; CHECK-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_512 = COPY [[V_MOV_B32_e32_]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
@@ -32,17 +32,17 @@ body: |
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]].sub3, undef %5:vgpr_32, $sgpr24_sgpr25_sgpr26_sgpr27, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32), align 8, addrspace 5)
; CHECK-NEXT: dead [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 undef %7:vgpr_32, 0, 0, implicit $exec
; CHECK-NEXT: dead [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 0, [[V_MOV_B32_e32_1]], implicit $exec
; CHECK-NEXT: dead [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[V_ADD_U32_e32_]], 0, 0, implicit $exec
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub1:vreg_512 = COPY [[COPY]].sub1
; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VGPR_16 */, def dead [[COPY1]], 851978 /* regdef:VGPR_16 */, def dead [[COPY]].sub1, 2147483657 /* reguse tiedto:$0 */, [[COPY1]], 2147549193 /* reguse tiedto:$1 */, [[COPY]].sub1
; CHECK-NEXT: [[COPY2:%[0-9]+]].sub0:vreg_512 = COPY [[COPY]].sub0
; CHECK-NEXT: [[COPY2:%[0-9]+]].sub3:vreg_512 = COPY [[COPY]].sub3
- ; CHECK-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK-NEXT: dead [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 4, [[V_MOV_B32_e32_2]], implicit-def dead $vcc, implicit $exec
; CHECK-NEXT: [[COPY2:%[0-9]+]].sub2:vreg_512 = COPY undef [[V_MOV_B32_e32_1]]
; CHECK-NEXT: [[COPY2:%[0-9]+]].sub5:vreg_512 = COPY undef [[V_MOV_B32_e32_1]]
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_512 = COPY [[COPY2]]
+ ; CHECK-NEXT: dead [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 4, [[V_MOV_B32_e32_2]], implicit-def dead $vcc, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.1
bb.0:
liveins: $sgpr6_sgpr7
>From 7e501d5e7a1648e3b894e53fc418b74cf4b8df77 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Fri, 31 Jan 2025 15:50:54 -0800
Subject: [PATCH 2/5] Do not allow remat for instructions with dependencies
Change-Id: I69ea68a41b86026e600ac2a4a6b5c351c6753893
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 41 +++++++++++++++++++++
1 file changed, 41 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 23f7eee863d95c1..3904926f18c8038 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1691,6 +1691,47 @@ void PreRARematStage::collectRematerializableInstructions() {
if (Def->getParent() == UseI->getParent())
continue;
+ bool HasRematDependency = false;
+ // Check if this instruction uses any registers that are planned to be
+ // rematerialized
+ for (auto &RematEntry : RematerializableInsts) {
+ if (find_if(RematEntry.second,
+ [&Def](std::pair<MachineInstr *, MachineInstr *> &Remat) {
+ for (MachineOperand &MO : Def->operands()) {
+ if (!MO.isReg())
+ continue;
+ if (MO.getReg() == Remat.first->getOperand(0).getReg())
+ return true;
+ }
+ return false;
+ }) != RematEntry.second.end()) {
+ HasRematDependency = true;
+ break;
+ }
+ }
+ // Do not rematerialize an instruction if it uses an instruction that we
+ // have designated for rematerialization.
+ // FIXME: Allow for rematerialization chains: this requires 1. updating
+ // remat points to account for uses that are rematerialized, and 2. either
+ // rematerializing the candidates in careful ordering, or deferring the MBB
+ // RP walk until the entire chain has been rematerialized.
+ if (HasRematDependency)
+ continue;
+
+ // Similarly, check if the UseI is planned to be remat.
+ for (auto &RematEntry : RematerializableInsts) {
+ if (find_if(RematEntry.second,
+ [&UseI](std::pair<MachineInstr *, MachineInstr *> &Remat) {
+ return Remat.first == UseI;
+ }) != RematEntry.second.end()) {
+ HasRematDependency = true;
+ break;
+ }
+ }
+
+ if (HasRematDependency)
+ break;
+
// We are only collecting defs that are defined in another block and are
// live-through or used inside regions at MinOccupancy. This means that the
// register must be in the live-in set for the region.
>From 10d53b0b65d21e60c4fc7e5482ad36df9cebf091 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 3 Feb 2025 19:28:35 -0800
Subject: [PATCH 3/5] Fix asserts
Change-Id: I2131d6510e74dc1a7c58ebe8007455cb3540a0f3
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 3904926f18c8038..b76747b2231b686 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1627,10 +1627,14 @@ bool PreRARematStage::allUsesAvailableAt(const MachineInstr *InstToRemat,
if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
continue;
- // Do not attempt to reason about PhysRegs
if (!MO.getReg().isVirtual()) {
- assert(DAG.MRI.isConstantPhysReg(MO.getReg()) ||
- DAG.TII->isIgnorableUse(MO));
+ // Do not attempt to reason about PhysRegs
+ // TODO: better analysis of PhysReg livness
+ if (!DAG.MRI.isConstantPhysReg(MO.getReg()) &&
+ !DAG.TII->isIgnorableUse(MO))
+ return false;
+
+ // Constant PhysRegs and IgnorableUses are okay
continue;
}
@@ -1664,7 +1668,6 @@ bool PreRARematStage::allUsesAvailableAt(const MachineInstr *InstToRemat,
if (LM.none())
break;
}
- assert(LM.none());
}
}
return true;
@@ -1856,10 +1859,7 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
MachineBasicBlock::iterator InsertPos =
MachineBasicBlock::iterator(It.second);
Register Reg = Def->getOperand(0).getReg();
- // Rematerialize MI to its use block. Since we are only rematerializing
- // instructions that do not have any virtual reg uses, we do not need to
- // call LiveRangeEdit::allUsesAvailableAt() and
- // LiveRangeEdit::canRematerializeAt().
+ // Rematerialize MI to its use block.
TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg,
Def->getOperand(0).getSubReg(), *Def, *DAG.TRI);
MachineInstr *NewMI = &*std::prev(InsertPos);
>From 389932b9c11fe958104429b51644f4ed052404a4 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Wed, 5 Feb 2025 09:31:31 -0800
Subject: [PATCH 4/5] Add test for omitted subrange
Change-Id: I644283266b49b0ccfeb0c6d7b7646865e78db643
---
.../machine-scheduler-sink-trivial-remats.mir | 251 ++++++++++++++++++
1 file changed, 251 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
index e8dd5eaccd2dba0..533222125757c74 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
@@ -7283,3 +7283,254 @@ body: |
S_ENDPGM 0
...
+# The remat candidate (instruction defining %3) uses a register (%2) which does not have a subrange for every subregister (sub1 is undefined).
+# Be sure that when checking if we can rematerialize %3, that we handle the liveness checking of %2 properly.
+
+---
+name: omitted_subrange
+tracksRegLiveness: true
+body: |
+ ; GFX908-LABEL: name: omitted_subrange
+ ; GFX908: bb.0:
+ ; GFX908-NEXT: successors: %bb.1(0x80000000)
+ ; GFX908-NEXT: liveins: $sgpr3, $sgpr4
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF31:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF32:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; GFX908-NEXT: undef [[V_RCP_F32_e32_:%[0-9]+]].sub0:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_RCP_F32_e32 [[DEF32]].sub0, implicit $mode, implicit $exec
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: dead [[DEF33:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_PK_MUL_F32_:%[0-9]+]]:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_PK_MUL_F32 0, [[V_RCP_F32_e32_]], 8, [[DEF32]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX908-NEXT: [[DEF34:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_BRANCH %bb.1
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.1:
+ ; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; GFX908-NEXT: liveins: $sgpr3, $sgpr4
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: %temp:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_CMP_LG_U32 $sgpr3, $sgpr4, implicit-def $scc
+ ; GFX908-NEXT: [[DEF34:%[0-9]+]].sub0:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_MUL_F32_e64 0, [[DEF32]].sub0, 1, %temp, 0, 0, implicit $mode, implicit $exec
+ ; GFX908-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; GFX908-NEXT: S_BRANCH %bb.3
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.2:
+ ; GFX908-NEXT: successors: %bb.3(0x80000000)
+ ; GFX908-NEXT: liveins: $sgpr3, $sgpr4
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: undef [[V_FMA_F32_e64_:%[0-9]+]].sub0:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_FMA_F32_e64 0, [[DEF34]].sub1, 0, [[V_RCP_F32_e32_]].sub0, 0, [[DEF34]].sub0, 0, 0, implicit $mode, implicit $exec
+ ; GFX908-NEXT: %temp2:vreg_64_align2 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_PK_FMA_F32_:%[0-9]+]]:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_PK_FMA_F32 0, [[V_FMA_F32_e64_]], 8, %temp2, 11, [[V_PK_MUL_F32_]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_29]], implicit [[V_CVT_I32_F32_e32_24]], implicit [[DEF22]], implicit [[DEF27]]
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_30]], implicit [[V_CVT_I32_F32_e32_25]], implicit [[DEF23]], implicit [[DEF28]]
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_31]], implicit [[V_CVT_I32_F32_e32_26]], implicit [[DEF24]], implicit [[DEF29]]
+ ; GFX908-NEXT: dead [[DEF35:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_22]], implicit [[V_CVT_I32_F32_e32_27]], implicit [[DEF25]], implicit [[DEF30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF26]], implicit [[DEF31]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_6]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_7]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_8]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_9]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_10]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_11]], implicit [[V_CVT_I32_F32_e32_12]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_13]], implicit [[V_CVT_I32_F32_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_13]], implicit [[V_CVT_I32_F32_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_15]], implicit [[V_CVT_I32_F32_e32_16]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_17]], implicit [[V_CVT_I32_F32_e32_18]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_19]], implicit [[V_CVT_I32_F32_e32_20]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_21]]
+ ; GFX908-NEXT: S_BRANCH %bb.3
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.3:
+ ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000)
+ ; GFX908-NEXT: liveins: $sgpr3, $sgpr4
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: S_CMP_LG_U32 $sgpr4, 0, implicit-def $scc
+ ; GFX908-NEXT: S_CBRANCH_SCC1 %bb.1, implicit killed $scc
+ ; GFX908-NEXT: S_BRANCH %bb.4
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.4:
+ ; GFX908-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $sgpr3, $sgpr4
+
+ %0:vreg_64 = IMPLICIT_DEF
+ %1:vreg_64_align2 = IMPLICIT_DEF
+ undef %2.sub0:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_RCP_F32_e32 %1.sub0:vreg_64_align2, implicit $mode, implicit $exec
+ %3:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_PK_MUL_F32 0, %2, 8, %1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ %5:vreg_64_align2 = IMPLICIT_DEF
+ %10:vgpr_32 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vgpr_32 = IMPLICIT_DEF
+ %13:vgpr_32 = IMPLICIT_DEF
+ %14:vgpr_32 = IMPLICIT_DEF
+ %15:vgpr_32 = IMPLICIT_DEF
+ %16:vgpr_32 = IMPLICIT_DEF
+ %17:vgpr_32 = IMPLICIT_DEF
+ %18:vgpr_32 = IMPLICIT_DEF
+ %19:vgpr_32 = IMPLICIT_DEF
+ %20:vgpr_32 = IMPLICIT_DEF
+ %21:vgpr_32 = IMPLICIT_DEF
+ %22:vgpr_32 = IMPLICIT_DEF
+ %23:vgpr_32 = IMPLICIT_DEF
+ %24:vgpr_32 = IMPLICIT_DEF
+ %25:vgpr_32 = IMPLICIT_DEF
+ %26:vgpr_32 = IMPLICIT_DEF
+ %27:vgpr_32 = IMPLICIT_DEF
+ %28:vgpr_32 = IMPLICIT_DEF
+ %29:vgpr_32 = IMPLICIT_DEF
+ %30:vgpr_32 = IMPLICIT_DEF
+ %31:vgpr_32 = IMPLICIT_DEF
+ %32:vgpr_32 = IMPLICIT_DEF
+ %33:vgpr_32 = IMPLICIT_DEF
+ %34:vgpr_32 = IMPLICIT_DEF
+ %35:vgpr_32 = IMPLICIT_DEF
+ %36:vgpr_32 = IMPLICIT_DEF
+ %37:vgpr_32 = IMPLICIT_DEF
+ %38:vgpr_32 = IMPLICIT_DEF
+ %39:vgpr_32 = IMPLICIT_DEF
+ %40:vgpr_32 = IMPLICIT_DEF
+ %41:vgpr_32 = IMPLICIT_DEF
+ %50:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %10, implicit $exec, implicit $mode
+ %51:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %11, implicit $exec, implicit $mode
+ %52:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %12, implicit $exec, implicit $mode
+ %53:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %13, implicit $exec, implicit $mode
+ %54:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %14, implicit $exec, implicit $mode
+ %55:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %15, implicit $exec, implicit $mode
+ %56:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %16, implicit $exec, implicit $mode
+ %57:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %17, implicit $exec, implicit $mode
+ %58:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %18, implicit $exec, implicit $mode
+ %59:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %19, implicit $exec, implicit $mode
+ %60:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %20, implicit $exec, implicit $mode
+ %61:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %21, implicit $exec, implicit $mode
+ %62:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %22, implicit $exec, implicit $mode
+ %63:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %23, implicit $exec, implicit $mode
+ %64:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %24, implicit $exec, implicit $mode
+ %65:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %25, implicit $exec, implicit $mode
+ %66:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %26, implicit $exec, implicit $mode
+ %67:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %27, implicit $exec, implicit $mode
+ %68:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %28, implicit $exec, implicit $mode
+ %69:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %29, implicit $exec, implicit $mode
+ %70:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %30, implicit $exec, implicit $mode
+ %71:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %31, implicit $exec, implicit $mode
+ %72:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %32, implicit $exec, implicit $mode
+ %73:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %33, implicit $exec, implicit $mode
+ %74:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %34, implicit $exec, implicit $mode
+ %75:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %35, implicit $exec, implicit $mode
+ %76:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %36, implicit $exec, implicit $mode
+ %77:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %37, implicit $exec, implicit $mode
+ %78:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %38, implicit $exec, implicit $mode
+ %79:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %39, implicit $exec, implicit $mode
+ %80:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %40, implicit $exec, implicit $mode
+ %81:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %41, implicit $exec, implicit $mode
+ S_BRANCH %bb.1
+
+ bb.1:
+ liveins: $sgpr3, $sgpr4
+
+ %temp:vgpr_32 = IMPLICIT_DEF
+ %5.sub0:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_MUL_F32_e64 0, %1.sub0:vreg_64_align2, 1, %temp, 0, 0, implicit $mode, implicit $exec
+
+ S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+ S_BRANCH %bb.4
+
+ bb.3:
+ liveins: $sgpr3, $sgpr4
+
+ %6:vreg_64_align2 = IMPLICIT_DEF
+ undef %7.sub0:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_FMA_F32_e64 0, %5.sub1, 0, %2.sub0:vreg_64_align2, 0, %5.sub0:vreg_64_align2, 0, 0, implicit $mode, implicit $exec
+ %temp2:vreg_64_align2 = IMPLICIT_DEF
+ %8:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_PK_FMA_F32 0, %7:vreg_64_align2, 8, %temp2, 11, %3:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ S_NOP 0, implicit %50, implicit %60, implicit %10, implicit %20
+ S_NOP 0, implicit %51, implicit %61, implicit %11, implicit %21
+ S_NOP 0, implicit %52, implicit %62, implicit %12, implicit %22
+ S_NOP 0, implicit %53, implicit %63, implicit %13, implicit %23
+ S_NOP 0, implicit %54, implicit %64, implicit %14, implicit %24
+ S_NOP 0, implicit %55, implicit %65
+ S_NOP 0, implicit %56, implicit %66
+ S_NOP 0, implicit %57, implicit %67
+ S_NOP 0, implicit %58, implicit %68
+ S_NOP 0, implicit %59, implicit %69
+ S_NOP 0, implicit %70, implicit %71
+ S_NOP 0, implicit %72, implicit %73
+ S_NOP 0, implicit %72, implicit %73
+ S_NOP 0, implicit %74, implicit %75
+ S_NOP 0, implicit %76, implicit %77
+ S_NOP 0, implicit %78, implicit %79
+ S_NOP 0, implicit %80
+ S_BRANCH %bb.4
+
+ bb.4:
+ liveins: $sgpr3, $sgpr4
+
+ S_CMP_LG_U32 $sgpr4, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.1, implicit killed $scc
+ S_BRANCH %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+
+...
>From ebfde36a5b2bf89bec5934f44ad557405ff26422 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Wed, 5 Feb 2025 17:27:57 -0800
Subject: [PATCH 5/5] Fix expensive check
Change-Id: I89e1100f69662c750c90383c64af1f066a117a59
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 10 +++++-
.../machine-scheduler-sink-trivial-remats.mir | 36 +++++++++----------
2 files changed, 27 insertions(+), 19 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index b76747b2231b686..176586e3fbbb60d 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1841,7 +1841,15 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
assert(NewLiveIns[I].contains(UseReg));
LaneBitmask LiveInMask = NewLiveIns[I][UseReg];
- assert((LiveInMask & LM) == LM);
+ LaneBitmask UncoveredLanes = LM & ~(LiveInMask & LM);
+ // If this register has lanes not covered by the LiveIns, be sure they
+ // do not map to any subrange. ref:
+ // machine-scheduler-sink-trivial-remats.mir::omitted_subrange
+ if (UncoveredLanes.any()) {
+ assert(LI.hasSubRanges());
+ for (LiveInterval::SubRange &SR : LI.subranges())
+ assert((SR.LaneMask & UncoveredLanes).none());
+ }
}
#endif
}
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
index 533222125757c74..fb65d80c46e06f7 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
@@ -7349,17 +7349,15 @@ body: |
; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF31:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF32:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX908-NEXT: undef [[V_RCP_F32_e32_:%[0-9]+]].sub0:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_RCP_F32_e32 [[DEF32]].sub0, implicit $mode, implicit $exec
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
; GFX908-NEXT: dead [[DEF33:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_PK_MUL_F32_:%[0-9]+]]:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_PK_MUL_F32 0, [[V_RCP_F32_e32_]], 8, [[DEF32]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
; GFX908-NEXT: [[DEF34:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX908-NEXT: S_BRANCH %bb.1
; GFX908-NEXT: {{ $}}
@@ -7379,17 +7377,19 @@ body: |
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: undef [[V_FMA_F32_e64_:%[0-9]+]].sub0:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_FMA_F32_e64 0, [[DEF34]].sub1, 0, [[V_RCP_F32_e32_]].sub0, 0, [[DEF34]].sub0, 0, 0, implicit $mode, implicit $exec
; GFX908-NEXT: %temp2:vreg_64_align2 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_PK_MUL_F32_:%[0-9]+]]:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_PK_MUL_F32 0, [[V_RCP_F32_e32_]], 8, [[DEF32]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
; GFX908-NEXT: dead [[V_PK_FMA_F32_:%[0-9]+]]:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_PK_FMA_F32 0, [[V_FMA_F32_e64_]], 8, %temp2, 11, [[V_PK_MUL_F32_]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_29]], implicit [[V_CVT_I32_F32_e32_24]], implicit [[DEF22]], implicit [[DEF27]]
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_30]], implicit [[V_CVT_I32_F32_e32_25]], implicit [[DEF23]], implicit [[DEF28]]
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_31]], implicit [[V_CVT_I32_F32_e32_26]], implicit [[DEF24]], implicit [[DEF29]]
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_28]], implicit [[V_CVT_I32_F32_e32_23]], implicit [[DEF22]], implicit [[DEF27]]
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_29]], implicit [[V_CVT_I32_F32_e32_24]], implicit [[DEF23]], implicit [[DEF28]]
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_30]], implicit [[V_CVT_I32_F32_e32_25]], implicit [[DEF24]], implicit [[DEF29]]
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
; GFX908-NEXT: dead [[DEF35:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_22]], implicit [[V_CVT_I32_F32_e32_27]], implicit [[DEF25]], implicit [[DEF30]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF26]], implicit [[DEF31]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_6]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_31]], implicit [[V_CVT_I32_F32_e32_26]], implicit [[DEF25]], implicit [[DEF30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_22]], implicit [[V_CVT_I32_F32_e32_27]], implicit [[DEF26]], implicit [[DEF31]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_6]], implicit [[DEF32]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_7]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_8]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_9]]
@@ -7509,7 +7509,7 @@ body: |
S_NOP 0, implicit %52, implicit %62, implicit %12, implicit %22
S_NOP 0, implicit %53, implicit %63, implicit %13, implicit %23
S_NOP 0, implicit %54, implicit %64, implicit %14, implicit %24
- S_NOP 0, implicit %55, implicit %65
+ S_NOP 0, implicit %55, implicit %65, implicit %1
S_NOP 0, implicit %56, implicit %66
S_NOP 0, implicit %57, implicit %67
S_NOP 0, implicit %58, implicit %68
More information about the llvm-commits
mailing list