[llvm] [AMDGPU][Scheduler] Refactor ArchVGPR rematerialization during scheduling (PR #125885)
Lucas Ramirez via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 24 07:36:31 PDT 2025
https://github.com/lucas-rami updated https://github.com/llvm/llvm-project/pull/125885
>From e75789af053e3bab953f88916a1b7033fdbe50af Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Wed, 4 Dec 2024 15:49:33 +0100
Subject: [PATCH 01/10] Integrate #124327
---
.../llvm/CodeGen/MachineRegisterInfo.h | 4 +
llvm/lib/CodeGen/MachineRegisterInfo.cpp | 5 +
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 754 +++--
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 105 +-
llvm/lib/Target/AMDGPU/GCNSubtarget.cpp | 53 +-
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 600 +---
.../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 40 +-
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 15 +
...ine-scheduler-sink-trivial-remats-attr.mir | 221 ++
...ne-scheduler-sink-trivial-remats-debug.mir | 4 +-
.../machine-scheduler-sink-trivial-remats.mir | 2752 +++++++++--------
11 files changed, 2367 insertions(+), 2186 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir
diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
index 1c465741cb462..cbf0347017d81 100644
--- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -23,6 +23,7 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/RegisterBank.h"
@@ -592,6 +593,9 @@ class MachineRegisterInfo {
/// multiple uses.
bool hasOneNonDBGUser(Register RegNo) const;
+ /// If the register has a single non-Debug instruction using the specified
+ /// register, returns it; otherwise returns nullptr.
+ MachineInstr *getOneNonDBGUser(Register RegNo) const;
/// hasAtMostUses - Return true if the given register has at most \p MaxUsers
/// non-debug user instructions.
diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index 937f63f6c5e00..b7135251781ad 100644
--- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -432,6 +432,11 @@ bool MachineRegisterInfo::hasOneNonDBGUser(Register RegNo) const {
return hasSingleElement(use_nodbg_instructions(RegNo));
}
+MachineInstr *MachineRegisterInfo::getOneNonDBGUser(Register RegNo) const {
+ auto RegNoDbgUsers = use_nodbg_instructions(RegNo);
+ return hasSingleElement(RegNoDbgUsers) ? &*RegNoDbgUsers.begin() : nullptr;
+}
+
bool MachineRegisterInfo::hasAtMostUserInstrs(Register Reg,
unsigned MaxUsers) const {
return hasNItemsOrLess(use_instr_nodbg_begin(Reg), use_instr_nodbg_end(),
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 176586e3fbbb6..01cbd0aefba39 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -25,8 +25,13 @@
#include "GCNSchedStrategy.h"
#include "AMDGPUIGroupLP.h"
+#include "GCNRegPressure.h"
#include "SIMachineFunctionInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/Support/ErrorHandling.h"
#define DEBUG_TYPE "machine-scheduler"
@@ -307,11 +312,11 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
HasHighPressure = true;
if (SGPRDelta > VGPRDelta) {
Cand.RPDelta.CriticalMax =
- PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
+ PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
} else {
Cand.RPDelta.CriticalMax =
- PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
+ PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
}
}
@@ -324,7 +329,7 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
const RegPressureTracker &RPTracker,
SchedCandidate &Cand,
bool IsBottomUp) {
- const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
+ const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(TRI);
ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
unsigned SGPRPressure = 0;
unsigned VGPRPressure = 0;
@@ -420,7 +425,7 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand,
/*IsBottomUp=*/false);
assert(TCand.SU == TopCand.SU &&
- "Last pick result should correspond to re-picking right now");
+ "Last pick result should correspond to re-picking right now");
}
#endif
}
@@ -890,13 +895,13 @@ GCNScheduleDAGMILive::getRegionLiveInMap() const {
std::vector<MachineInstr *> RegionFirstMIs;
RegionFirstMIs.reserve(Regions.size());
auto I = Regions.rbegin(), E = Regions.rend();
- auto *BB = I->first->getParent();
do {
+ const MachineBasicBlock *MBB = I->first->getParent();
auto *MI = &*skipDebugInstructionsForward(I->first, I->second);
RegionFirstMIs.push_back(MI);
do {
++I;
- } while (I != E && I->first->getParent() == BB);
+ } while (I != E && I->first->getParent() == MBB);
} while (I != E);
return getLiveRegMap(RegionFirstMIs, /*After=*/false, *LIS);
}
@@ -1081,31 +1086,32 @@ bool ClusteredLowOccStage::initGCNSchedStage() {
return true;
}
-bool PreRARematStage::initGCNSchedStage() {
- if (!GCNSchedStage::initGCNSchedStage())
- return false;
-
- if (DAG.RegionsWithMinOcc.none() || DAG.Regions.size() == 1)
- return false;
-
- const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
- // Rematerialization will not help if occupancy is not limited by reg usage.
- if (ST.getOccupancyWithWorkGroupSizes(MF).second == DAG.MinOccupancy)
- return false;
+/// Allows to easily filter for this stage's debug output.
+#define REMAT_DEBUG(X) LLVM_DEBUG(dbgs() << "[PreRARemat] "; X;)
- // FIXME: This pass will invalidate cached MBBLiveIns for regions
- // inbetween the defs and region we sinked the def to. Cached pressure
- // for regions where a def is sinked from will also be invalidated. Will
- // need to be fixed if there is another pass after this pass.
+bool PreRARematStage::initGCNSchedStage() {
+ // FIXME: This pass will invalidate cached BBLiveInMap and MBBLiveIns for
+ // regions inbetween the defs and region we sinked the def to. Will need to be
+ // fixed if there is another pass after this pass.
assert(!S.hasNextStage());
- collectRematerializableInstructions();
- if (RematerializableInsts.empty() || !sinkTriviallyRematInsts(ST, TII))
+ if (!GCNSchedStage::initGCNSchedStage() || DAG.RegionsWithMinOcc.none() ||
+ DAG.Regions.size() == 1 || !canIncreaseOccupancyOrReduceSpill())
return false;
- LLVM_DEBUG(
- dbgs() << "Retrying function scheduling with improved occupancy of "
- << DAG.MinOccupancy << " from rematerializing\n");
+ // Rematerialize identified instructions and update scheduler's state.
+ rematerialize();
+ if (GCNTrackers)
+ DAG.RegionLiveOuts.buildLiveRegMap();
+ REMAT_DEBUG(
+ dbgs() << "Retrying function scheduling with new min. occupancy of "
+ << AchievedOcc << " from rematerializing (original was "
+ << DAG.MinOccupancy << ", target was " << TargetOcc << ")\n");
+ if (AchievedOcc > DAG.MinOccupancy) {
+ DAG.MinOccupancy = AchievedOcc;
+ SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+ MFI.increaseOccupancy(MF, DAG.MinOccupancy);
+ }
return true;
}
@@ -1397,8 +1403,7 @@ GCNSchedStage::getScheduleMetrics(const std::vector<SUnit> &InputSchedule) {
#ifndef NDEBUG
LLVM_DEBUG(
printScheduleModel(ReadyCyclesSorted);
- dbgs() << "\n\t"
- << "Metric: "
+ dbgs() << "\n\t" << "Metric: "
<< (SumBubbles
? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle
: 1)
@@ -1433,8 +1438,7 @@ GCNSchedStage::getScheduleMetrics(const GCNScheduleDAGMILive &DAG) {
#ifndef NDEBUG
LLVM_DEBUG(
printScheduleModel(ReadyCyclesSorted);
- dbgs() << "\n\t"
- << "Metric: "
+ dbgs() << "\n\t" << "Metric: "
<< (SumBubbles
? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle
: 1)
@@ -1482,8 +1486,7 @@ bool UnclusteredHighRPStage::shouldRevertScheduling(unsigned WavesAfter) {
dbgs()
<< "\n\t *** In shouldRevertScheduling ***\n"
<< " *********** BEFORE UnclusteredHighRPStage ***********\n");
- ScheduleMetrics MBefore =
- getScheduleMetrics(DAG.SUnits);
+ ScheduleMetrics MBefore = getScheduleMetrics(DAG.SUnits);
LLVM_DEBUG(
dbgs()
<< "\n *********** AFTER UnclusteredHighRPStage ***********\n");
@@ -1516,13 +1519,9 @@ bool ClusteredLowOccStage::shouldRevertScheduling(unsigned WavesAfter) {
}
bool PreRARematStage::shouldRevertScheduling(unsigned WavesAfter) {
- if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
- return true;
-
- if (mayCauseSpilling(WavesAfter))
- return true;
-
- return false;
+ return GCNSchedStage::shouldRevertScheduling(WavesAfter) ||
+ mayCauseSpilling(WavesAfter) ||
+ (IncreaseOccupancy && WavesAfter < TargetOcc);
}
bool ILPInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {
@@ -1673,160 +1672,235 @@ bool PreRARematStage::allUsesAvailableAt(const MachineInstr *InstToRemat,
return true;
}
-void PreRARematStage::collectRematerializableInstructions() {
+bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
- for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) {
- Register Reg = Register::index2VirtReg(I);
- if (!DAG.LIS->hasInterval(Reg))
- continue;
-
- // TODO: Handle AGPR and SGPR rematerialization
- if (!SRI->isVGPRClass(DAG.MRI.getRegClass(Reg)) ||
- !DAG.MRI.hasOneDef(Reg) || !DAG.MRI.hasOneNonDBGUse(Reg))
- continue;
-
- MachineOperand *Op = DAG.MRI.getOneDef(Reg);
- MachineInstr *Def = Op->getParent();
- if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def))
- continue;
-
- MachineInstr *UseI = &*DAG.MRI.use_instr_nodbg_begin(Reg);
- if (Def->getParent() == UseI->getParent())
- continue;
- bool HasRematDependency = false;
- // Check if this instruction uses any registers that are planned to be
- // rematerialized
- for (auto &RematEntry : RematerializableInsts) {
- if (find_if(RematEntry.second,
- [&Def](std::pair<MachineInstr *, MachineInstr *> &Remat) {
- for (MachineOperand &MO : Def->operands()) {
- if (!MO.isReg())
- continue;
- if (MO.getReg() == Remat.first->getOperand(0).getReg())
- return true;
- }
- return false;
- }) != RematEntry.second.end()) {
- HasRematDependency = true;
- break;
+ REMAT_DEBUG(dbgs() << "Collecting rematerializable instructions in "
+ << MF.getFunction().getName() << '\n');
+
+ // Maps optimizable regions (i.e., regions at minimum and VGPR-limited
+ // occupancy, or regions with VGPR spilling) to their excess RP.
+ DenseMap<unsigned, unsigned> OptRegions;
+
+ // Note that the maximum number of VGPRs to use to eliminate spill may be
+ // lower than the maximum number to increase occupancy when the function has
+ // the "amdgpu-num-vgpr" attribute.
+ const std::pair<unsigned, unsigned> OccBounds =
+ ST.getOccupancyWithWorkGroupSizes(MF);
+ // FIXME: we should be able to just call ST.getMaxNumArchVGPRs() but that
+ // would use the occupancy bounds as determined by
+ // MF.getFunction().getWavesPerEU(), which look incorrect in some cases.
+ const unsigned MaxVGPRsNoSpill =
+ ST.getBaseMaxNumVGPRs(MF.getFunction(),
+ {ST.getMinNumArchVGPRs(OccBounds.second),
+ ST.getMaxNumArchVGPRs(OccBounds.first)},
+ false);
+ const unsigned MaxVGPRsIncOcc = ST.getMaxNumArchVGPRs(DAG.MinOccupancy + 1);
+ IncreaseOccupancy = OccBounds.second > DAG.MinOccupancy;
+
+ // Collect optimizable regions. If there is spilling in any region we will
+ // just try to reduce it. Otherwise we will try to increase occupancy by one.
+ for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
+ GCNRegPressure &RP = DAG.Pressure[I];
+ unsigned NumVGPRs = RP.getArchVGPRNum();
+ unsigned ExcessRP = 0;
+ if (NumVGPRs > MaxVGPRsNoSpill) {
+ if (IncreaseOccupancy) {
+ // We won't try to increase occupancy.
+ IncreaseOccupancy = false;
+ OptRegions.clear();
}
- }
- // Do not rematerialize an instruction if it uses an instruction that we
- // have designated for rematerialization.
- // FIXME: Allow for rematerialization chains: this requires 1. updating
- // remat points to account for uses that are rematerialized, and 2. either
- // rematerializing the candidates in careful ordering, or deferring the MBB
- // RP walk until the entire chain has been rematerialized.
- if (HasRematDependency)
- continue;
-
- // Similarly, check if the UseI is planned to be remat.
- for (auto &RematEntry : RematerializableInsts) {
- if (find_if(RematEntry.second,
- [&UseI](std::pair<MachineInstr *, MachineInstr *> &Remat) {
- return Remat.first == UseI;
- }) != RematEntry.second.end()) {
- HasRematDependency = true;
- break;
- }
- }
-
- if (HasRematDependency)
- break;
-
- // We are only collecting defs that are defined in another block and are
- // live-through or used inside regions at MinOccupancy. This means that the
- // register must be in the live-in set for the region.
- bool AddedToRematList = false;
- for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
- auto It = DAG.LiveIns[I].find(Reg);
- if (It != DAG.LiveIns[I].end() && !It->second.none()) {
- if (DAG.RegionsWithMinOcc[I]) {
- SlotIndex DefIdx = DAG.LIS->getInstructionIndex(*Def);
- SlotIndex UseIdx =
- DAG.LIS->getInstructionIndex(*UseI).getRegSlot(true);
- if (allUsesAvailableAt(Def, DefIdx, UseIdx)) {
- RematerializableInsts[I][Def] = UseI;
- AddedToRematList = true;
- }
- }
-
- // Collect regions with rematerializable reg as live-in to avoid
- // searching later when updating RP.
- RematDefToLiveInRegions[Def].push_back(I);
+ // Region has VGPR spilling, we will try to reduce spilling as much as
+ // possible.
+ ExcessRP = NumVGPRs - MaxVGPRsNoSpill;
+ REMAT_DEBUG(dbgs() << "Region " << I << " is spilling VGPRs, save "
+ << ExcessRP << " VGPR(s) to eliminate spilling\n");
+ } else if (IncreaseOccupancy) {
+ if (ST.getOccupancyWithNumSGPRs(RP.getSGPRNum()) == DAG.MinOccupancy) {
+ // Occupancy is SGPR-limited in the region, no point in trying to
+ // increase it through VGPR usage.
+ IncreaseOccupancy = false;
+ OptRegions.clear();
+ } else if (NumVGPRs > MaxVGPRsIncOcc) {
+ // Occupancy is VGPR-limited.
+ ExcessRP = NumVGPRs - MaxVGPRsIncOcc;
+ REMAT_DEBUG(dbgs() << "Region " << I << " has min. occupancy: save "
+ << ExcessRP << " VGPR(s) to improve occupancy\n");
}
}
- if (!AddedToRematList)
- RematDefToLiveInRegions.erase(Def);
+ if (ExcessRP)
+ OptRegions.insert({I, ExcessRP});
}
-}
+ if (OptRegions.empty())
+ return false;
-bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
- const TargetInstrInfo *TII) {
- // Temporary copies of cached variables we will be modifying and replacing if
- // sinking succeeds.
- SmallVector<
- std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>, 32>
- NewRegions;
- DenseMap<unsigned, GCNRPTracker::LiveRegSet> NewLiveIns;
- DenseMap<unsigned, GCNRegPressure> NewPressure;
- BitVector NewRescheduleRegions;
- LiveIntervals *LIS = DAG.LIS;
+ // When we are reducing spilling, the target is the minimum achievable
+ // occupancy implied by workgroup sizes.
+ TargetOcc = IncreaseOccupancy ? DAG.MinOccupancy + 1 : OccBounds.first;
+
+ // Accounts for a reduction in RP in an optimizable region. Returns whether we
+ // estimate that we have identified enough rematerialization opportunities to
+ // achieve our goal.
+ auto ReduceRPInRegion = [&](auto OptIt, LaneBitmask Mask) -> bool {
+ auto NumRegs = SIRegisterInfo::getNumCoveredRegs(Mask);
+ unsigned I = OptIt->getFirst();
+ unsigned &Excess = OptIt->getSecond();
+ if (NumRegs >= Excess)
+ OptRegions.erase(I);
+ else
+ Excess -= NumRegs;
+ return OptRegions.empty();
+ };
+
+ // We need up-to-date live-out info. to query live-out register masks in
+ // regions containing rematerializable instructions.
+ DAG.RegionLiveOuts.buildLiveRegMap();
+
+ // Cache set of registers that are going to be rematerialized.
+ DenseSet<unsigned> RematRegs;
+
+ // identify rematerializable instructions in the function.
+ for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
+ auto Region = DAG.Regions[I];
+ for (auto MI = Region.first; MI != Region.second; ++MI) {
+ // The instruction must be trivially rematerializable.
+ MachineInstr &DefMI = *MI;
+ if (!isTriviallyReMaterializable(DefMI))
+ continue;
- NewRegions.resize(DAG.Regions.size());
- NewRescheduleRegions.resize(DAG.Regions.size());
+ // We only support rematerializing virtual VGPRs with one definition.
+ Register Reg = DefMI.getOperand(0).getReg();
+ if (!Reg.isVirtual() || !DAG.LIS->hasInterval(Reg) ||
+ !SRI->isVGPRClass(DAG.MRI.getRegClass(Reg)) ||
+ !DAG.MRI.hasOneDef(Reg))
+ continue;
- // Collect only regions that has a rematerializable def as a live-in.
- SmallSet<unsigned, 16> ImpactedRegions;
- for (const auto &It : RematDefToLiveInRegions)
- ImpactedRegions.insert(It.second.begin(), It.second.end());
+ // We only care to rematerialize the instruction if it has a single
+ // non-debug user in a different block.
+ MachineInstr *UseMI = DAG.MRI.getOneNonDBGUser(Reg);
+ if (!UseMI || DefMI.getParent() == UseMI->getParent())
+ continue;
- // Make copies of register pressure and live-ins cache that will be updated
- // as we rematerialize.
- for (auto Idx : ImpactedRegions) {
- NewPressure[Idx] = DAG.Pressure[Idx];
- NewLiveIns[Idx] = DAG.LiveIns[Idx];
- }
- NewRegions = DAG.Regions;
- NewRescheduleRegions.reset();
+ // Do not rematerialize an instruction if it uses or is used by an
+ // instruction that we have designated for rematerialization.
+ // FIXME: Allow for rematerialization chains: this requires 1. updating
+ // remat points to account for uses that are rematerialized, and 2. either
+ // rematerializing the candidates in careful ordering, or deferring the
+ // MBB RP walk until the entire chain has been rematerialized.
+ if (Rematerializations.contains(UseMI) ||
+ llvm::any_of(DefMI.operands(), [&RematRegs](MachineOperand &MO) {
+ return MO.isReg() && RematRegs.contains(MO.getReg());
+ }))
+ continue;
- DenseMap<MachineInstr *, MachineInstr *> InsertedMIToOldDef;
- bool Improved = false;
- for (auto I : ImpactedRegions) {
- if (!DAG.RegionsWithMinOcc[I])
- continue;
+ // Do not rematerialize an instruction it it uses registers that aren't
+ // available at its use. This ensures that we are not extending any live
+ // range while rematerializing.
+ SlotIndex DefIdx = DAG.LIS->getInstructionIndex(DefMI);
+ SlotIndex UseIdx = DAG.LIS->getInstructionIndex(*UseMI).getRegSlot(true);
+ if (!allUsesAvailableAt(&DefMI, DefIdx, UseIdx))
+ continue;
- Improved = false;
- int VGPRUsage = NewPressure[I].getVGPRNum(ST.hasGFX90AInsts());
- int SGPRUsage = NewPressure[I].getSGPRNum();
+ REMAT_DEBUG(dbgs() << "Region " << I << ": remat instruction " << DefMI);
+ RematInstruction &Remat =
+ Rematerializations.try_emplace(&DefMI, I, UseMI).first->second;
+
+ bool RematUseful = false;
+ if (auto It = OptRegions.find(I); It != OptRegions.end()) {
+ // Optimistically consider that moving the instruction out of its
+ // defining region will reduce RP in the latter; this assumes that
+ // maximum RP in the region is reached somewhere between the defining
+ // instruction and the end of the region.
+ REMAT_DEBUG(dbgs() << " Defining region is optimizable\n");
+ RematUseful = true;
+ LaneBitmask Mask = DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I)[Reg];
+ if (ReduceRPInRegion(It, Mask))
+ return true;
+ }
- // TODO: Handle occupancy drop due to AGPR and SGPR.
- // Check if cause of occupancy drop is due to VGPR usage and not SGPR.
- if (ST.getOccupancyWithNumSGPRs(SGPRUsage) == DAG.MinOccupancy)
- break;
+ for (unsigned LIRegion = 0; LIRegion != E; ++LIRegion) {
+ // We are only collecting regions in which the register is a live-in
+ // (and may be live-through).
+ auto It = DAG.LiveIns[LIRegion].find(Reg);
+ if (It == DAG.LiveIns[LIRegion].end() || It->second.none())
+ continue;
+ Remat.LiveInRegions.insert(LIRegion);
+
+ // Account for the reduction in RP due to the rematerialization in an
+ // optimizable region in which the defined register is a live-in. This
+ // is exact for live-through region but optimistic in the using region,
+ // where RP is actually reduced only if maximum RP is reached somewhere
+ // between the beginning of the region and the rematerializable
+ // instruction's use.
+ if (auto It = OptRegions.find(LIRegion); It != OptRegions.end()) {
+ REMAT_DEBUG(dbgs() << " Live-in in region " << LIRegion << '\n');
+ RematUseful = true;
+ if (ReduceRPInRegion(It, DAG.LiveIns[LIRegion][Reg]))
+ return true;
+ }
+ }
- // The occupancy of this region could have been improved by a previous
- // iteration's sinking of defs.
- if (NewPressure[I].getOccupancy(ST) > DAG.MinOccupancy) {
- NewRescheduleRegions[I] = true;
- Improved = true;
- continue;
+ // If the instruction is not a live-in or live-out in any optimizable
+ // region then there is no point in rematerializing it.
+ if (!RematUseful) {
+ Rematerializations.pop_back();
+ REMAT_DEBUG(dbgs() << " No impact, not rematerializing instruction\n");
+ } else {
+ RematRegs.insert(Reg);
+ }
}
+ }
+
+ if (IncreaseOccupancy) {
+ // We were trying to increase occupancy but failed, abort the stage.
+ REMAT_DEBUG(dbgs() << "Cannot increase occupancy\n");
+ Rematerializations.clear();
+ return false;
+ }
+ REMAT_DEBUG(dbgs() << "Can reduce but not eliminate spilling\n");
+ return !Rematerializations.empty();
+}
+
+void PreRARematStage::rematerialize() {
+ const auto *TII =
+ static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+ // Collect regions whose RP changes in unpredictable way; we will have to
+ // fully recompute their RP after all rematerailizations.
+ DenseSet<unsigned> RecomputeRP;
+ SlotIndexes *Slots = DAG.LIS->getSlotIndexes();
+
+ // Rematerialize all instructions.
+ for (auto &[DefMI, Remat] : Rematerializations) {
+ MachineBasicBlock::iterator InsertPos(Remat.UseMI);
+ Register Reg = DefMI->getOperand(0).getReg();
+ unsigned SubReg = DefMI->getOperand(0).getSubReg();
+
+ // Rematerialize DefMI to its use block.
+ TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg, SubReg, *DefMI,
+ *DAG.TRI);
+ Remat.RematMI = &*std::prev(InsertPos);
+ Remat.RematMI->getOperand(0).setSubReg(SubReg);
+ DAG.LIS->InsertMachineInstrInMaps(*Remat.RematMI);
+
+ // Update region boundaries in regions we sinked from (remove defining MI)
+ // and to (insert MI rematerialized in use block). Only then we can erase
+ // the original MI.
+ DAG.updateRegionBoundaries(DAG.Regions, DefMI, nullptr);
+ DAG.updateRegionBoundaries(DAG.Regions, InsertPos, Remat.RematMI);
+ DefMI->eraseFromParent();
+ DAG.LIS->RemoveMachineInstrFromMaps(*DefMI);
+
+ // Collect all regions impacted by the rematerialization and update their
+ // live-in/RP information.
+ for (unsigned I : Remat.LiveInRegions) {
+ ImpactedRegions.insert({I, DAG.Pressure[I]});
+ GCNRPTracker::LiveRegSet &RegionLiveIns = DAG.LiveIns[I];
- // First check if we have enough trivially rematerializable instructions to
- // improve occupancy. Optimistically assume all instructions we are able to
- // sink decreased RP.
- int TotalSinkableRegs = 0;
- for (const auto &It : RematerializableInsts[I]) {
- MachineInstr *Def = It.first;
- Register DefReg = Def->getOperand(0).getReg();
- TotalSinkableRegs +=
- SIRegisterInfo::getNumCoveredRegs(NewLiveIns[I][DefReg]);
#ifdef EXPENSIVE_CHECKS
// All uses are known to be available / live at the remat point. Thus, the
// uses should already be live in to the region.
- for (MachineOperand &MO : Def->operands()) {
+ for (MachineOperand &MO : DefMI->operands()) {
if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
continue;
@@ -1834,13 +1908,13 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
if (!UseReg.isVirtual())
continue;
- LiveInterval &LI = LIS->getInterval(UseReg);
+ LiveInterval &LI = DAG.LIS->getInterval(UseReg);
LaneBitmask LM = DAG.MRI.getMaxLaneMaskForVReg(MO.getReg());
if (LI.hasSubRanges() && MO.getSubReg())
LM = DAG.TRI->getSubRegIndexLaneMask(MO.getSubReg());
- assert(NewLiveIns[I].contains(UseReg));
- LaneBitmask LiveInMask = NewLiveIns[I][UseReg];
+ assert(RegionLiveIns.contains(UseReg));
+ LaneBitmask LiveInMask = RegionLiveIns[UseReg];
LaneBitmask UncoveredLanes = LM & ~(LiveInMask & LM);
// If this register has lanes not covered by the LiveIns, be sure they
// do not map to any subrange. ref:
@@ -1852,130 +1926,104 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
}
}
#endif
- }
- int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs;
- unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs(VGPRsAfterSink);
- // If in the most optimistic scenario, we cannot improve occupancy, then do
- // not attempt to sink any instructions.
- if (OptimisticOccupancy <= DAG.MinOccupancy)
- break;
- unsigned ImproveOccupancy = 0;
- SmallVector<MachineInstr *, 4> SinkedDefs;
- for (auto &It : RematerializableInsts[I]) {
- MachineInstr *Def = It.first;
- MachineBasicBlock::iterator InsertPos =
- MachineBasicBlock::iterator(It.second);
- Register Reg = Def->getOperand(0).getReg();
- // Rematerialize MI to its use block.
- TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg,
- Def->getOperand(0).getSubReg(), *Def, *DAG.TRI);
- MachineInstr *NewMI = &*std::prev(InsertPos);
- LIS->InsertMachineInstrInMaps(*NewMI);
- LIS->removeInterval(Reg);
- LIS->createAndComputeVirtRegInterval(Reg);
- InsertedMIToOldDef[NewMI] = Def;
-
- // Update region boundaries in scheduling region we sinked from since we
- // may sink an instruction that was at the beginning or end of its region
- DAG.updateRegionBoundaries(NewRegions, Def, /*NewMI =*/nullptr,
- /*Removing =*/true);
-
- // Update region boundaries in region we sinked to.
- DAG.updateRegionBoundaries(NewRegions, InsertPos, NewMI);
-
- LaneBitmask PrevMask = NewLiveIns[I][Reg];
- // FIXME: Also update cached pressure for where the def was sinked from.
- // Update RP for all regions that has this reg as a live-in and remove
- // the reg from all regions as a live-in.
- for (auto Idx : RematDefToLiveInRegions[Def]) {
- NewLiveIns[Idx].erase(Reg);
- if (InsertPos->getParent() != DAG.Regions[Idx].first->getParent()) {
- // Def is live-through and not used in this block.
- NewPressure[Idx].inc(Reg, PrevMask, LaneBitmask::getNone(), DAG.MRI);
- } else {
- // Def is used and rematerialized into this block.
- GCNDownwardRPTracker RPT(*LIS);
- auto *NonDbgMI = &*skipDebugInstructionsForward(
- NewRegions[Idx].first, NewRegions[Idx].second);
- RPT.reset(*NonDbgMI, &NewLiveIns[Idx]);
- RPT.advance(NewRegions[Idx].second);
- NewPressure[Idx] = RPT.moveMaxPressure();
- }
- }
-
- SinkedDefs.push_back(Def);
- ImproveOccupancy = NewPressure[I].getOccupancy(ST);
- if (ImproveOccupancy > DAG.MinOccupancy)
- break;
+ // The register is no longer a live-in in all regions but the one that
+ // contains the single use. In live-through regions, maximum register
+ // pressure decreases predictably so we can directly update it. In the
+ // using region, maximum RP may or may not decrease, so we will mark it
+ // for re-computation after all materializations have taken place.
+ LaneBitmask PrevMask = RegionLiveIns[Reg];
+ RegionLiveIns.erase(Reg);
+ RegMasks.insert({{I, Remat.RematMI->getOperand(0).getReg()}, PrevMask});
+ if (Remat.UseMI->getParent() != DAG.Regions[I].first->getParent())
+ DAG.Pressure[I].inc(Reg, PrevMask, LaneBitmask::getNone(), DAG.MRI);
+ else
+ RecomputeRP.insert(I);
}
-
- // Remove defs we just sinked from all regions' list of sinkable defs
- for (auto &Def : SinkedDefs)
- for (auto TrackedIdx : RematDefToLiveInRegions[Def])
- RematerializableInsts[TrackedIdx].erase(Def);
-
- if (ImproveOccupancy <= DAG.MinOccupancy)
- break;
-
- NewRescheduleRegions[I] = true;
- Improved = true;
- }
-
- if (!Improved) {
- // Occupancy was not improved for all regions that were at MinOccupancy.
- // Undo sinking and remove newly rematerialized instructions.
- for (auto &Entry : InsertedMIToOldDef) {
- MachineInstr *MI = Entry.first;
- MachineInstr *OldMI = Entry.second;
- Register Reg = MI->getOperand(0).getReg();
- LIS->RemoveMachineInstrFromMaps(*MI);
- MI->eraseFromParent();
- OldMI->clearRegisterDeads(Reg);
- LIS->removeInterval(Reg);
- LIS->createAndComputeVirtRegInterval(Reg);
+ // RP in the region from which the instruction was rematerialized may or may
+ // not decrease.
+ ImpactedRegions.insert({Remat.DefRegion, DAG.Pressure[Remat.DefRegion]});
+ RecomputeRP.insert(Remat.DefRegion);
+
+ // Update the register's live interval manually. This is aligned with the
+ // instruction collection phase in that it assumes a single def and use
+ // (possibly subreg).
+ SlotIndex NewDef = Slots->getInstructionIndex(*Remat.RematMI).getRegSlot();
+ SlotIndex NewKill = Slots->getInstructionIndex(*Remat.UseMI).getRegSlot();
+
+ // Update the live range to reflect the register's rematerialization. We
+ // will have a single segment (we only remat regs with one use) and at most
+ // a single value number (we only remat virtual regs with a single def).
+ auto UpdateLiveRange = [&](LiveRange &LR) -> void {
+ assert(LR.segments.size() && "expected at least one segment");
+ assert(LR.valnos.size() < 2 && "expected at most one value number");
+
+ // Fix up the first segment and remove the others which have become
+ // unnecessary by construction.
+ LR.segments.truncate(1);
+ LiveRange::Segment &Seg = LR.segments.front();
+ Seg.start = NewDef;
+ Seg.end = NewKill;
+ LR.valnos.clear();
+ if (Seg.valno) {
+ // If present, update the segment's value number as well.
+ Seg.valno->def = NewDef;
+ LR.valnos.push_back(Seg.valno);
+ }
+ };
+
+ Register RematReg = Remat.RematMI->getOperand(0).getReg();
+ LiveInterval &RegLI = DAG.LIS->getInterval(RematReg);
+ UpdateLiveRange(RegLI);
+ if (RegLI.hasSubRanges()) {
+ LiveInterval::SubRange &SubRange = *RegLI.subrange_begin();
+ assert(!SubRange.Next && "expected at most one subrange");
+ UpdateLiveRange(SubRange);
}
- return false;
}
- // Occupancy was improved for all regions.
- for (auto &Entry : InsertedMIToOldDef) {
- MachineInstr *MI = Entry.first;
- MachineInstr *OldMI = Entry.second;
-
- // Remove OldMI from BBLiveInMap since we are sinking it from its MBB.
- DAG.BBLiveInMap.erase(OldMI);
-
- // Remove OldMI and update LIS
- Register Reg = MI->getOperand(0).getReg();
- LIS->RemoveMachineInstrFromMaps(*OldMI);
- OldMI->eraseFromParent();
- LIS->removeInterval(Reg);
- LIS->createAndComputeVirtRegInterval(Reg);
- }
+ // All regions impacted by at least one rematerialization must be rescheduled.
+ // Maximum pressure must also be recomputed for all regions where it changed
+ // non-predictably and checked against the target occupancy.
+ AchievedOcc = TargetOcc;
+ for (auto &[I, OriginalRP] : ImpactedRegions) {
+ bool IsEmptyRegion = DAG.Regions[I].first == DAG.Regions[I].second;
+ DAG.RescheduleRegions[I] = !IsEmptyRegion;
+ if (!RecomputeRP.contains(I))
+ continue;
- // Update live-ins, register pressure, and regions caches.
- for (auto Idx : ImpactedRegions) {
- DAG.LiveIns[Idx] = NewLiveIns[Idx];
- DAG.Pressure[Idx] = NewPressure[Idx];
- DAG.MBBLiveIns.erase(DAG.Regions[Idx].first->getParent());
+ GCNRegPressure RP;
+ if (IsEmptyRegion) {
+ RP = getRegPressure(DAG.MRI, DAG.LiveIns[I]);
+ } else {
+ GCNDownwardRPTracker RPT(*DAG.LIS);
+ auto *NonDbgMI = &*skipDebugInstructionsForward(DAG.Regions[I].first,
+ DAG.Regions[I].second);
+ if (NonDbgMI == DAG.Regions[I].second) {
+ // Region is non-empty but contains only debug instructions.
+ RP = getRegPressure(DAG.MRI, DAG.LiveIns[I]);
+ } else {
+ RPT.reset(*NonDbgMI, &DAG.LiveIns[I]);
+ RPT.advance(DAG.Regions[I].second);
+ RP = RPT.moveMaxPressure();
+ }
+ }
+ DAG.Pressure[I] = RP;
+ AchievedOcc = std::min(AchievedOcc, RP.getOccupancy(ST));
}
- DAG.Regions = NewRegions;
- DAG.RescheduleRegions = NewRescheduleRegions;
-
- if (GCNTrackers)
- DAG.RegionLiveOuts.buildLiveRegMap();
-
- SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
- MFI.increaseOccupancy(MF, ++DAG.MinOccupancy);
-
- return true;
+ REMAT_DEBUG(dbgs() << "Achieved occupancy " << AchievedOcc << "\n");
}
+// Copied from MachineLICM
bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) {
if (!DAG.TII->isTriviallyReMaterializable(MI))
return false;
+ // Even though TargetInstrInfo::isReallyTriviallyReMaterializable already
+ // ensures that the instruction has no virtual register uses,
+ // SIInstrInfo::isReallyTriviallyReMaterializable may consider an instruction
+ // rematerializable and return before calling its parent's method, so we need
+ // to double-check here.
for (const MachineOperand &MO : MI.all_uses()) {
// We can't remat physreg uses, unless it is a constant or an ignorable
// use (e.g. implicit exec use on VALU instructions)
@@ -1989,45 +2037,115 @@ bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) {
return true;
}
-// When removing, we will have to check both beginning and ending of the region.
-// When inserting, we will only have to check if we are inserting NewMI in front
-// of a scheduling region and do not need to check the ending since we will only
-// ever be inserting before an already existing MI.
+/// Identifies the parent MBB of a \p Region. For an empty region, this runs in
+/// linear time in the number of MBBs in \p MF; otherwise it runs in constant
+/// time.
+static MachineBasicBlock *getRegionMBB(MachineFunction &MF,
+ RegionBoundaries &Region) {
+ if (Region.first != Region.second)
+ return Region.first->getParent();
+ // The boundaries of an empty region may not point to a valid MI from which we
+ // can get the parent MBB, so we have to iterate over all the MF's MBBs.
+ for (MachineBasicBlock &MBB : MF)
+ if (MBB.end() == Region.second)
+ return &MBB;
+ llvm_unreachable("region's parent MBB cannot be identified");
+}
+
+void PreRARematStage::finalizeGCNSchedStage() {
+ // We consider that reducing spilling is always beneficial so we never
+ // rollback rematerializations in such cases. It's also possible that
+ // rescheduling lowers occupancy over the one achived just through remats, in
+ // which case we do not want to rollback either.
+ if (!IncreaseOccupancy || AchievedOcc == TargetOcc)
+ return;
+
+ REMAT_DEBUG(dbgs() << "Rollbacking all rematerializations\n");
+ const auto *TII =
+ static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+ // Rollback the rematerializations.
+ for (const auto &[_, Remat] : Rematerializations) {
+ MachineInstr &RematMI = *Remat.RematMI;
+ MachineBasicBlock::iterator InsertPos(DAG.Regions[Remat.DefRegion].second);
+ MachineBasicBlock *MBB = getRegionMBB(MF, DAG.Regions[Remat.DefRegion]);
+ Register Reg = RematMI.getOperand(0).getReg();
+ unsigned SubReg = RematMI.getOperand(0).getSubReg();
+
+ // Re-rematerialize MI at the end of its original region. Note that it may
+ // not be rematerialized exactly in the same position as originally within
+ // the region, but it should not matter much. Since we are only
+ // rematerializing instructions that do not have any virtual reg uses, we
+ // do not need to call LiveRangeEdit::allUsesAvailableAt() and
+ // LiveRangeEdit::canRematerializeAt().
+ TII->reMaterialize(*MBB, InsertPos, Reg, SubReg, RematMI, *DAG.TRI);
+ MachineInstr *NewMI = &*std::prev(InsertPos);
+ NewMI->getOperand(0).setSubReg(SubReg);
+ DAG.LIS->InsertMachineInstrInMaps(*NewMI);
+
+ // Erase rematerialized MI.
+ RematMI.eraseFromParent();
+ DAG.LIS->RemoveMachineInstrFromMaps(RematMI);
+
+ // Recompute live interval for the re-rematerialized register
+ DAG.LIS->removeInterval(Reg);
+ DAG.LIS->createAndComputeVirtRegInterval(Reg);
+
+ // Re-add the register as a live-in in all regions it used to be one in.
+ for (unsigned LIRegion : Remat.LiveInRegions)
+ DAG.LiveIns[LIRegion].insert({Reg, RegMasks.at({LIRegion, Reg})});
+ }
+
+ // Reset RP in all impacted regions.
+ for (auto &[I, OriginalRP] : ImpactedRegions)
+ DAG.Pressure[I] = OriginalRP;
+
+ GCNSchedStage::finalizeGCNSchedStage();
+}
+
void GCNScheduleDAGMILive::updateRegionBoundaries(
- SmallVectorImpl<std::pair<MachineBasicBlock::iterator,
- MachineBasicBlock::iterator>> &RegionBoundaries,
- MachineBasicBlock::iterator MI, MachineInstr *NewMI, bool Removing) {
+ SmallVectorImpl<RegionBoundaries> &RegionBoundaries,
+ MachineBasicBlock::iterator MI, MachineInstr *NewMI) {
unsigned I = 0, E = RegionBoundaries.size();
- // Search for first region of the block where MI is located
- while (I != E && MI->getParent() != RegionBoundaries[I].first->getParent())
+ // Search for first region of the block where MI is located. We may encounter
+ // an empty region if all instructions from an initially non-empty region were
+ // removed.
+ while (I != E && RegionBoundaries[I].first != RegionBoundaries[I].second &&
+ MI->getParent() != RegionBoundaries[I].first->getParent())
++I;
for (; I != E; ++I) {
- if (MI->getParent() != RegionBoundaries[I].first->getParent())
+ auto &Bounds = RegionBoundaries[I];
+ assert(MI != Bounds.second && "cannot insert at region end");
+ assert(!NewMI || NewMI != Bounds.second && "cannot remove at region end");
+
+ // We may encounter an empty region if all of the region' instructions were
+ // previously removed.
+ if (Bounds.first == Bounds.second) {
+ if (MI->getParent()->end() != Bounds.second)
+ return;
+ continue;
+ }
+ if (MI->getParent() != Bounds.first->getParent())
return;
- if (Removing && MI == RegionBoundaries[I].first &&
- MI == RegionBoundaries[I].second) {
- // MI is in a region with size 1, after removing, the region will be
- // size 0, set RegionBegin and RegionEnd to pass end of block iterator.
- RegionBoundaries[I] =
- std::pair(MI->getParent()->end(), MI->getParent()->end());
- return;
- }
- if (MI == RegionBoundaries[I].first) {
- if (Removing)
- RegionBoundaries[I] =
- std::pair(std::next(MI), RegionBoundaries[I].second);
+ // We only care for modifications at the beginning of the region since the
+ // upper region boundary is exclusive.
+ if (MI != Bounds.first)
+ continue;
+ if (!NewMI) {
+ // This is an MI removal, which may leave the region empty; in such cases
+ // set both boundaries to the removed instruction's MBB's end.
+ MachineBasicBlock::iterator NextMI = std::next(MI);
+ if (NextMI != Bounds.second)
+ Bounds.first = NextMI;
else
- // Inserted NewMI in front of region, set new RegionBegin to NewMI
- RegionBoundaries[I] = std::pair(MachineBasicBlock::iterator(NewMI),
- RegionBoundaries[I].second);
- return;
- }
- if (Removing && MI == RegionBoundaries[I].second) {
- RegionBoundaries[I] = std::pair(RegionBoundaries[I].first, std::prev(MI));
- return;
+ Bounds.first = Bounds.second;
+ } else {
+ // This is an MI insertion at the beggining of the region.
+ Bounds.first = NewMI;
}
+ return;
}
}
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 7d3e63df43da6..2d6440b1b0730 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -14,7 +14,9 @@
#define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
#include "GCNRegPressure.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineScheduler.h"
namespace llvm {
@@ -214,6 +216,11 @@ class RegionPressureMap {
}
};
+/// A region's boundaries i.e. a pair of instruction bundle iterators. The lower
+/// boundary is inclusive, the upper boundary is exclusive.
+using RegionBoundaries =
+ std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>;
+
class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
friend class GCNSchedStage;
friend class OccInitialScheduleStage;
@@ -234,8 +241,7 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
unsigned MinOccupancy;
// Vector of regions recorder for later rescheduling
- SmallVector<std::pair<MachineBasicBlock::iterator,
- MachineBasicBlock::iterator>, 32> Regions;
+ SmallVector<RegionBoundaries, 32> Regions;
// Records if a region is not yet scheduled, or schedule has been reverted,
// or we generally desire to reschedule it.
@@ -286,12 +292,14 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
// Compute and cache live-ins and pressure for all regions in block.
void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB);
- // Update region boundaries when removing MI or inserting NewMI before MI.
- void updateRegionBoundaries(
- SmallVectorImpl<std::pair<MachineBasicBlock::iterator,
- MachineBasicBlock::iterator>> &RegionBoundaries,
- MachineBasicBlock::iterator MI, MachineInstr *NewMI,
- bool Removing = false);
+ /// If necessary, updates a region's boundaries following insertion ( \p NewMI
+ /// != nullptr) or removal ( \p NewMI == nullptr) of a \p MI in the region.
+ /// For an MI removal, this must be called before the MI is actually erased
+ /// from its parent MBB. If a region is left empty by a removal, both
+ /// boundaries are set to the last removed MI's MBB's end.
+ void
+ updateRegionBoundaries(SmallVectorImpl<RegionBoundaries> &RegionBoundaries,
+ MachineBasicBlock::iterator MI, MachineInstr *NewMI);
void runSchedStages();
@@ -431,30 +439,71 @@ class ClusteredLowOccStage : public GCNSchedStage {
: GCNSchedStage(StageID, DAG) {}
};
+/// Attempts to reduce function spilling or, if there is no spilling, to
+/// increase function occupancy by one with respect to ArchVGPR usage by sinking
+/// trivially rematerializable instructions to their use. When the stage
+/// estimates reducing spilling or increasing occupancy is possible, as few
+/// instructions as possible are rematerialized to reduce potential negative
+/// effects on function latency.
+///
+/// TODO: We should extend this to work on SGPRs and AGPRs as well.
class PreRARematStage : public GCNSchedStage {
private:
- // Each region at MinOccupancy will have their own list of trivially
- // rematerializable instructions we can remat to reduce RP. The list maps an
- // instruction to the position we should remat before, usually the MI using
- // the rematerializable instruction.
- MapVector<unsigned, MapVector<MachineInstr *, MachineInstr *>>
- RematerializableInsts;
-
- // Map a trivially rematerializable def to a list of regions at MinOccupancy
- // that has the defined reg as a live-in.
- DenseMap<MachineInstr *, SmallVector<unsigned, 4>> RematDefToLiveInRegions;
-
- // Collect all trivially rematerializable VGPR instructions with a single def
- // and single use outside the defining block into RematerializableInsts.
- void collectRematerializableInstructions();
-
+ /// Useful information about a rematerializable instruction.
+ struct RematInstruction {
+ /// Single use of the rematerializable instruction's defined register,
+ /// located in a different block.
+ MachineInstr *UseMI;
+ /// Rematerialized version of \p DefMI, set in
+ /// PreRARematStage::rematerialize. Used for reverting rematerializations.
+ MachineInstr *RematMI;
+ /// Set of regions in which the rematerializable instruction's defined
+ /// register is a live-in.
+ SmallDenseSet<unsigned, 4> LiveInRegions;
+ /// Region containing the rematerializable instruction.
+ unsigned DefRegion;
+
+ RematInstruction(unsigned DefRegion, MachineInstr *UseMI)
+ : UseMI(UseMI), DefRegion(DefRegion) {}
+ };
+
+ /// Collects instructions to rematerialize.
+ MapVector<MachineInstr *, RematInstruction> Rematerializations;
+ /// Collect regions whose live-ins or register pressure will change due to
+ /// rematerializations.
+ DenseMap<unsigned, GCNRegPressure> ImpactedRegions;
+ /// In case we need to rollback rematerializations, save lane masks for all
+ /// rematerialized registers in all regions in which they are live-ins.
+ DenseMap<std::pair<unsigned, Register>, LaneBitmask> RegMasks;
+ /// Target occupancy the stage estimates is reachable through
+ /// rematerialization. Greater than or equal to the pre-stage min occupancy.
+ unsigned TargetOcc;
+ /// Achieved occupancy *only* through rematerializations (pre-rescheduling).
+ /// Smaller than or equal to the target occupancy.
+ unsigned AchievedOcc;
+ /// Whether the stage is attempting to increase occupancy in the abscence of
+ /// spilling.
+ bool IncreaseOccupancy;
+
+ /// Returns whether remat can reduce spilling or increase function occupancy
+ /// by 1 through rematerialization. If it can do one, collects instructions in
+ /// PreRARematStage::Rematerializations and sets the target occupancy in
+ /// PreRARematStage::TargetOccupancy.
+ bool canIncreaseOccupancyOrReduceSpill();
+
+ /// Whether the MI is trivially rematerializable and does not have any virtual
+ /// register use.
bool isTriviallyReMaterializable(const MachineInstr &MI);
- // TODO: Should also attempt to reduce RP of SGPRs and AGPRs
- // Attempt to reduce RP of VGPR by sinking trivially rematerializable
- // instructions. Returns true if we were able to sink instruction(s).
- bool sinkTriviallyRematInsts(const GCNSubtarget &ST,
- const TargetInstrInfo *TII);
+ /// Rematerializes all instructions in PreRARematStage::Rematerializations
+ /// and stores the achieved occupancy after remat in
+ /// PreRARematStage::AchievedOcc.
+ void rematerialize();
+
+ /// If remat alone did not increase occupancy to the target one, rollbacks all
+ /// rematerializations and resets live-ins/RP in all regions impacted by the
+ /// stage to their pre-stage values.
+ void finalizeGCNSchedStage() override;
/// \p Returns true if all the uses in \p InstToRemat defined at \p
/// OriginalIdx are live at \p RematIdx. This only checks liveness of virtual
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
index b5e8e246825c7..0507b64c46192 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
@@ -496,44 +496,43 @@ unsigned GCNSubtarget::getMaxNumSGPRs(const Function &F) const {
getReservedNumSGPRs(F));
}
-unsigned GCNSubtarget::getBaseMaxNumVGPRs(
- const Function &F, std::pair<unsigned, unsigned> WavesPerEU) const {
- // Compute maximum number of VGPRs function can use using default/requested
- // minimum number of waves per execution unit.
- unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first);
+unsigned
+GCNSubtarget::getBaseMaxNumVGPRs(const Function &F,
+ std::pair<unsigned, unsigned> NumVGPRBounds,
+ bool UnifiedRF) const {
+ const auto &[Min, Max] = NumVGPRBounds;
// Check if maximum number of VGPRs was explicitly requested using
// "amdgpu-num-vgpr" attribute.
- if (F.hasFnAttribute("amdgpu-num-vgpr")) {
- unsigned Requested =
- F.getFnAttributeAsParsedInteger("amdgpu-num-vgpr", MaxNumVGPRs);
-
- if (hasGFX90AInsts())
- Requested *= 2;
-
- // Make sure requested value is compatible with values implied by
- // default/requested minimum/maximum number of waves per execution unit.
- if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first))
- Requested = 0;
- if (WavesPerEU.second && Requested &&
- Requested < getMinNumVGPRs(WavesPerEU.second))
- Requested = 0;
+ if (!F.hasFnAttribute("amdgpu-num-vgpr"))
+ return Max;
- if (Requested)
- MaxNumVGPRs = Requested;
- }
+ unsigned Requested = F.getFnAttributeAsParsedInteger("amdgpu-num-vgpr", Max);
+ if (!Requested)
+ return Max;
+ if (UnifiedRF)
+ Requested *= 2;
- return MaxNumVGPRs;
+ // Make sure requested value is inside the range of possible VGPR usage.
+ return std::clamp(Requested, Min, Max);
}
unsigned GCNSubtarget::getMaxNumVGPRs(const Function &F) const {
- return getBaseMaxNumVGPRs(F, getWavesPerEU(F));
+ std::pair<unsigned, unsigned> Waves = getWavesPerEU(F);
+ return getBaseMaxNumVGPRs(
+ F, {getMinNumVGPRs(Waves.second), getMaxNumVGPRs(Waves.first)},
+ hasGFX90AInsts());
+}
+
+unsigned GCNSubtarget::getMaxNumArchVGPRs(const Function &F) const {
+ std::pair<unsigned, unsigned> Waves = getWavesPerEU(F);
+ return getBaseMaxNumVGPRs(
+ F, {getMinNumArchVGPRs(Waves.second), getMaxNumArchVGPRs(Waves.first)},
+ false);
}
unsigned GCNSubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
- const Function &F = MF.getFunction();
- const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
- return getBaseMaxNumVGPRs(F, MFI.getWavesPerEU());
+ return getMaxNumVGPRs(MF.getFunction());
}
void GCNSubtarget::adjustSchedDependency(
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 342b211199dca..29acb91afb82c 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -38,12 +38,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
// Following 2 enums are documented at:
// - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
enum class TrapHandlerAbi {
- NONE = 0x00,
+ NONE = 0x00,
AMDHSA = 0x01,
};
enum class TrapID {
- LLVMAMDHSATrap = 0x02,
+ LLVMAMDHSATrap = 0x02,
LLVMAMDHSADebugTrap = 0x03,
};
@@ -269,24 +269,20 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
const GCNTargetMachine &TM);
~GCNSubtarget() override;
- GCNSubtarget &initializeSubtargetDependencies(const Triple &TT,
- StringRef GPU, StringRef FS);
+ GCNSubtarget &initializeSubtargetDependencies(const Triple &TT, StringRef GPU,
+ StringRef FS);
/// Diagnose inconsistent subtarget features before attempting to codegen
/// function \p F.
void checkSubtargetFeatures(const Function &F) const;
- const SIInstrInfo *getInstrInfo() const override {
- return &InstrInfo;
- }
+ const SIInstrInfo *getInstrInfo() const override { return &InstrInfo; }
const SIFrameLowering *getFrameLowering() const override {
return &FrameLowering;
}
- const SITargetLowering *getTargetLowering() const override {
- return &TLInfo;
- }
+ const SITargetLowering *getTargetLowering() const override { return &TLInfo; }
const SIRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
@@ -324,9 +320,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
- Generation getGeneration() const {
- return (Generation)Gen;
- }
+ Generation getGeneration() const { return (Generation)Gen; }
unsigned getMaxWaveScratchSize() const {
// See COMPUTE_TMPRING_SIZE.WAVESIZE.
@@ -347,9 +341,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return llvm::countl_zero(getMaxWaveScratchSize()) + getWavefrontSizeLog2();
}
- int getLDSBankCount() const {
- return LDSBankCount;
- }
+ int getLDSBankCount() const { return LDSBankCount; }
unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
@@ -364,29 +356,17 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool supportsWGP() const { return getGeneration() >= GFX10; }
- bool hasIntClamp() const {
- return HasIntClamp;
- }
+ bool hasIntClamp() const { return HasIntClamp; }
- bool hasFP64() const {
- return FP64;
- }
+ bool hasFP64() const { return FP64; }
- bool hasMIMG_R128() const {
- return MIMG_R128;
- }
+ bool hasMIMG_R128() const { return MIMG_R128; }
- bool hasHWFP64() const {
- return FP64;
- }
+ bool hasHWFP64() const { return FP64; }
- bool hasHalfRate64Ops() const {
- return HalfRate64Ops;
- }
+ bool hasHalfRate64Ops() const { return HalfRate64Ops; }
- bool hasFullRate64Ops() const {
- return FullRate64Ops;
- }
+ bool hasFullRate64Ops() const { return FullRate64Ops; }
bool hasAddr64() const {
return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
@@ -402,65 +382,37 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return getGeneration() >= VOLCANIC_ISLANDS;
}
- bool hasFractBug() const {
- return getGeneration() == SOUTHERN_ISLANDS;
- }
+ bool hasFractBug() const { return getGeneration() == SOUTHERN_ISLANDS; }
- bool hasBFE() const {
- return true;
- }
+ bool hasBFE() const { return true; }
- bool hasBFI() const {
- return true;
- }
+ bool hasBFI() const { return true; }
- bool hasBFM() const {
- return hasBFE();
- }
+ bool hasBFM() const { return hasBFE(); }
- bool hasBCNT(unsigned Size) const {
- return true;
- }
+ bool hasBCNT(unsigned Size) const { return true; }
- bool hasFFBL() const {
- return true;
- }
+ bool hasFFBL() const { return true; }
- bool hasFFBH() const {
- return true;
- }
+ bool hasFFBH() const { return true; }
- bool hasMed3_16() const {
- return getGeneration() >= AMDGPUSubtarget::GFX9;
- }
+ bool hasMed3_16() const { return getGeneration() >= AMDGPUSubtarget::GFX9; }
bool hasMin3Max3_16() const {
return getGeneration() >= AMDGPUSubtarget::GFX9;
}
- bool hasFmaMixInsts() const {
- return HasFmaMixInsts;
- }
+ bool hasFmaMixInsts() const { return HasFmaMixInsts; }
- bool hasCARRY() const {
- return true;
- }
+ bool hasCARRY() const { return true; }
- bool hasFMA() const {
- return FMA;
- }
+ bool hasFMA() const { return FMA; }
- bool hasSwap() const {
- return GFX9Insts;
- }
+ bool hasSwap() const { return GFX9Insts; }
- bool hasScalarPackInsts() const {
- return GFX9Insts;
- }
+ bool hasScalarPackInsts() const { return GFX9Insts; }
- bool hasScalarMulHiInsts() const {
- return GFX9Insts;
- }
+ bool hasScalarMulHiInsts() const { return GFX9Insts; }
bool hasScalarSubwordLoads() const { return getGeneration() >= GFX12; }
@@ -475,9 +427,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// True if the offset field of DS instructions works as expected. On SI, the
/// offset uses a 16-bit adder and does not always wrap properly.
- bool hasUsableDSOffset() const {
- return getGeneration() >= SEA_ISLANDS;
- }
+ bool hasUsableDSOffset() const { return getGeneration() >= SEA_ISLANDS; }
bool unsafeDSOffsetFoldingEnabled() const {
return EnableUnsafeDSOffsetFolding;
@@ -490,14 +440,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// Extra wait hazard is needed in some cases before
/// s_cbranch_vccnz/s_cbranch_vccz.
- bool hasReadVCCZBug() const {
- return getGeneration() <= SEA_ISLANDS;
- }
+ bool hasReadVCCZBug() const { return getGeneration() <= SEA_ISLANDS; }
/// Writes to VCC_LO/VCC_HI update the VCCZ flag.
- bool partialVCCWritesUpdateVCCZ() const {
- return getGeneration() >= GFX10;
- }
+ bool partialVCCWritesUpdateVCCZ() const { return getGeneration() >= GFX10; }
/// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
/// was written by a VALU instruction.
@@ -511,18 +457,14 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return getGeneration() >= VOLCANIC_ISLANDS;
}
- bool hasRFEHazards() const {
- return getGeneration() >= VOLCANIC_ISLANDS;
- }
+ bool hasRFEHazards() const { return getGeneration() >= VOLCANIC_ISLANDS; }
/// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
unsigned getSetRegWaitStates() const {
return getGeneration() <= SEA_ISLANDS ? 1 : 2;
}
- bool dumpCode() const {
- return DumpCode;
- }
+ bool dumpCode() const { return DumpCode; }
/// Return the amount of LDS that can be used that will not restrict the
/// occupancy lower than WaveCount.
@@ -538,25 +480,17 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return getGeneration() >= AMDGPUSubtarget::GFX10;
}
- bool useFlatForGlobal() const {
- return FlatForGlobal;
- }
+ bool useFlatForGlobal() const { return FlatForGlobal; }
/// \returns If target supports ds_read/write_b128 and user enables generation
/// of ds_read/write_b128.
- bool useDS128() const {
- return CIInsts && EnableDS128;
- }
+ bool useDS128() const { return CIInsts && EnableDS128; }
/// \return If target supports ds_read/write_b96/128.
- bool hasDS96AndDS128() const {
- return CIInsts;
- }
+ bool hasDS96AndDS128() const { return CIInsts; }
/// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
- bool haveRoundOpsF64() const {
- return CIInsts;
- }
+ bool haveRoundOpsF64() const { return CIInsts; }
/// \returns If MUBUF instructions always perform range checking, even for
/// buffer resources used for private memory access.
@@ -566,89 +500,55 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// \returns If target requires PRT Struct NULL support (zero result registers
/// for sparse texture support).
- bool usePRTStrictNull() const {
- return EnablePRTStrictNull;
- }
+ bool usePRTStrictNull() const { return EnablePRTStrictNull; }
- bool hasAutoWaitcntBeforeBarrier() const {
- return AutoWaitcntBeforeBarrier;
- }
+ bool hasAutoWaitcntBeforeBarrier() const { return AutoWaitcntBeforeBarrier; }
/// \returns true if the target supports backing off of s_barrier instructions
/// when an exception is raised.
- bool supportsBackOffBarrier() const {
- return BackOffBarrier;
- }
+ bool supportsBackOffBarrier() const { return BackOffBarrier; }
- bool hasUnalignedBufferAccess() const {
- return UnalignedBufferAccess;
- }
+ bool hasUnalignedBufferAccess() const { return UnalignedBufferAccess; }
bool hasUnalignedBufferAccessEnabled() const {
return UnalignedBufferAccess && UnalignedAccessMode;
}
- bool hasUnalignedDSAccess() const {
- return UnalignedDSAccess;
- }
+ bool hasUnalignedDSAccess() const { return UnalignedDSAccess; }
bool hasUnalignedDSAccessEnabled() const {
return UnalignedDSAccess && UnalignedAccessMode;
}
- bool hasUnalignedScratchAccess() const {
- return UnalignedScratchAccess;
- }
+ bool hasUnalignedScratchAccess() const { return UnalignedScratchAccess; }
bool hasUnalignedScratchAccessEnabled() const {
return UnalignedScratchAccess && UnalignedAccessMode;
}
- bool hasUnalignedAccessMode() const {
- return UnalignedAccessMode;
- }
+ bool hasUnalignedAccessMode() const { return UnalignedAccessMode; }
- bool hasApertureRegs() const {
- return HasApertureRegs;
- }
+ bool hasApertureRegs() const { return HasApertureRegs; }
- bool isTrapHandlerEnabled() const {
- return TrapHandler;
- }
+ bool isTrapHandlerEnabled() const { return TrapHandler; }
- bool isXNACKEnabled() const {
- return TargetID.isXnackOnOrAny();
- }
+ bool isXNACKEnabled() const { return TargetID.isXnackOnOrAny(); }
- bool isTgSplitEnabled() const {
- return EnableTgSplit;
- }
+ bool isTgSplitEnabled() const { return EnableTgSplit; }
- bool isCuModeEnabled() const {
- return EnableCuMode;
- }
+ bool isCuModeEnabled() const { return EnableCuMode; }
bool isPreciseMemoryEnabled() const { return EnablePreciseMemory; }
- bool hasFlatAddressSpace() const {
- return FlatAddressSpace;
- }
+ bool hasFlatAddressSpace() const { return FlatAddressSpace; }
- bool hasFlatScrRegister() const {
- return hasFlatAddressSpace();
- }
+ bool hasFlatScrRegister() const { return hasFlatAddressSpace(); }
- bool hasFlatInstOffsets() const {
- return FlatInstOffsets;
- }
+ bool hasFlatInstOffsets() const { return FlatInstOffsets; }
- bool hasFlatGlobalInsts() const {
- return FlatGlobalInsts;
- }
+ bool hasFlatGlobalInsts() const { return FlatGlobalInsts; }
- bool hasFlatScratchInsts() const {
- return FlatScratchInsts;
- }
+ bool hasFlatScratchInsts() const { return FlatScratchInsts; }
// Check if target supports ST addressing mode with FLAT scratch instructions.
// The ST addressing mode means no registers are used, either VGPR or SGPR,
@@ -659,30 +559,20 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }
- bool hasScalarFlatScratchInsts() const {
- return ScalarFlatScratchInsts;
- }
+ bool hasScalarFlatScratchInsts() const { return ScalarFlatScratchInsts; }
bool enableFlatScratch() const {
return flatScratchIsArchitected() ||
(EnableFlatScratch && hasFlatScratchInsts());
}
- bool hasGlobalAddTidInsts() const {
- return GFX10_BEncoding;
- }
+ bool hasGlobalAddTidInsts() const { return GFX10_BEncoding; }
- bool hasAtomicCSub() const {
- return GFX10_BEncoding;
- }
+ bool hasAtomicCSub() const { return GFX10_BEncoding; }
- bool hasExportInsts() const {
- return !hasGFX940Insts();
- }
+ bool hasExportInsts() const { return !hasGFX940Insts(); }
- bool hasVINTERPEncoding() const {
- return GFX11Insts;
- }
+ bool hasVINTERPEncoding() const { return GFX11Insts; }
// DS_ADD_F64/DS_ADD_RTN_F64
bool hasLdsAtomicAddF64() const { return hasGFX90AInsts(); }
@@ -691,162 +581,98 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return getGeneration() >= GFX9;
}
- bool hasFlatSegmentOffsetBug() const {
- return HasFlatSegmentOffsetBug;
- }
+ bool hasFlatSegmentOffsetBug() const { return HasFlatSegmentOffsetBug; }
- bool hasFlatLgkmVMemCountInOrder() const {
- return getGeneration() > GFX9;
- }
+ bool hasFlatLgkmVMemCountInOrder() const { return getGeneration() > GFX9; }
- bool hasD16LoadStore() const {
- return getGeneration() >= GFX9;
- }
+ bool hasD16LoadStore() const { return getGeneration() >= GFX9; }
bool d16PreservesUnusedBits() const {
return hasD16LoadStore() && !TargetID.isSramEccOnOrAny();
}
- bool hasD16Images() const {
- return getGeneration() >= VOLCANIC_ISLANDS;
- }
+ bool hasD16Images() const { return getGeneration() >= VOLCANIC_ISLANDS; }
/// Return if most LDS instructions have an m0 use that require m0 to be
/// initialized.
- bool ldsRequiresM0Init() const {
- return getGeneration() < GFX9;
- }
+ bool ldsRequiresM0Init() const { return getGeneration() < GFX9; }
// True if the hardware rewinds and replays GWS operations if a wave is
// preempted.
//
// If this is false, a GWS operation requires testing if a nack set the
// MEM_VIOL bit, and repeating if so.
- bool hasGWSAutoReplay() const {
- return getGeneration() >= GFX9;
- }
+ bool hasGWSAutoReplay() const { return getGeneration() >= GFX9; }
/// \returns if target has ds_gws_sema_release_all instruction.
- bool hasGWSSemaReleaseAll() const {
- return CIInsts;
- }
+ bool hasGWSSemaReleaseAll() const { return CIInsts; }
/// \returns true if the target has integer add/sub instructions that do not
/// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
/// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
/// for saturation.
- bool hasAddNoCarry() const {
- return AddNoCarryInsts;
- }
+ bool hasAddNoCarry() const { return AddNoCarryInsts; }
bool hasScalarAddSub64() const { return getGeneration() >= GFX12; }
bool hasScalarSMulU64() const { return getGeneration() >= GFX12; }
- bool hasUnpackedD16VMem() const {
- return HasUnpackedD16VMem;
- }
+ bool hasUnpackedD16VMem() const { return HasUnpackedD16VMem; }
// Covers VS/PS/CS graphics shaders
bool isMesaGfxShader(const Function &F) const {
return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
}
- bool hasMad64_32() const {
- return getGeneration() >= SEA_ISLANDS;
- }
+ bool hasMad64_32() const { return getGeneration() >= SEA_ISLANDS; }
- bool hasSDWAOmod() const {
- return HasSDWAOmod;
- }
+ bool hasSDWAOmod() const { return HasSDWAOmod; }
- bool hasSDWAScalar() const {
- return HasSDWAScalar;
- }
+ bool hasSDWAScalar() const { return HasSDWAScalar; }
- bool hasSDWASdst() const {
- return HasSDWASdst;
- }
+ bool hasSDWASdst() const { return HasSDWASdst; }
- bool hasSDWAMac() const {
- return HasSDWAMac;
- }
+ bool hasSDWAMac() const { return HasSDWAMac; }
- bool hasSDWAOutModsVOPC() const {
- return HasSDWAOutModsVOPC;
- }
+ bool hasSDWAOutModsVOPC() const { return HasSDWAOutModsVOPC; }
- bool hasDLInsts() const {
- return HasDLInsts;
- }
+ bool hasDLInsts() const { return HasDLInsts; }
bool hasFmacF64Inst() const { return HasFmacF64Inst; }
- bool hasDot1Insts() const {
- return HasDot1Insts;
- }
+ bool hasDot1Insts() const { return HasDot1Insts; }
- bool hasDot2Insts() const {
- return HasDot2Insts;
- }
+ bool hasDot2Insts() const { return HasDot2Insts; }
- bool hasDot3Insts() const {
- return HasDot3Insts;
- }
+ bool hasDot3Insts() const { return HasDot3Insts; }
- bool hasDot4Insts() const {
- return HasDot4Insts;
- }
+ bool hasDot4Insts() const { return HasDot4Insts; }
- bool hasDot5Insts() const {
- return HasDot5Insts;
- }
+ bool hasDot5Insts() const { return HasDot5Insts; }
- bool hasDot6Insts() const {
- return HasDot6Insts;
- }
+ bool hasDot6Insts() const { return HasDot6Insts; }
- bool hasDot7Insts() const {
- return HasDot7Insts;
- }
+ bool hasDot7Insts() const { return HasDot7Insts; }
- bool hasDot8Insts() const {
- return HasDot8Insts;
- }
+ bool hasDot8Insts() const { return HasDot8Insts; }
- bool hasDot9Insts() const {
- return HasDot9Insts;
- }
+ bool hasDot9Insts() const { return HasDot9Insts; }
- bool hasDot10Insts() const {
- return HasDot10Insts;
- }
+ bool hasDot10Insts() const { return HasDot10Insts; }
- bool hasDot11Insts() const {
- return HasDot11Insts;
- }
+ bool hasDot11Insts() const { return HasDot11Insts; }
- bool hasDot12Insts() const {
- return HasDot12Insts;
- }
+ bool hasDot12Insts() const { return HasDot12Insts; }
- bool hasDot13Insts() const {
- return HasDot13Insts;
- }
+ bool hasDot13Insts() const { return HasDot13Insts; }
- bool hasMAIInsts() const {
- return HasMAIInsts;
- }
+ bool hasMAIInsts() const { return HasMAIInsts; }
- bool hasFP8Insts() const {
- return HasFP8Insts;
- }
+ bool hasFP8Insts() const { return HasFP8Insts; }
bool hasFP8ConversionInsts() const { return HasFP8ConversionInsts; }
- bool hasPkFmacF16Inst() const {
- return HasPkFmacF16Inst;
- }
+ bool hasPkFmacF16Inst() const { return HasPkFmacF16Inst; }
bool hasAtomicFMinFMaxF32GlobalInsts() const {
return HasAtomicFMinFMaxF32GlobalInsts;
@@ -919,37 +745,23 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return HasDefaultComponentBroadcast;
}
- bool hasNoSdstCMPX() const {
- return HasNoSdstCMPX;
- }
+ bool hasNoSdstCMPX() const { return HasNoSdstCMPX; }
- bool hasVscnt() const {
- return HasVscnt;
- }
+ bool hasVscnt() const { return HasVscnt; }
- bool hasGetWaveIdInst() const {
- return HasGetWaveIdInst;
- }
+ bool hasGetWaveIdInst() const { return HasGetWaveIdInst; }
- bool hasSMemTimeInst() const {
- return HasSMemTimeInst;
- }
+ bool hasSMemTimeInst() const { return HasSMemTimeInst; }
- bool hasShaderCyclesRegister() const {
- return HasShaderCyclesRegister;
- }
+ bool hasShaderCyclesRegister() const { return HasShaderCyclesRegister; }
bool hasShaderCyclesHiLoRegisters() const {
return HasShaderCyclesHiLoRegisters;
}
- bool hasVOP3Literal() const {
- return HasVOP3Literal;
- }
+ bool hasVOP3Literal() const { return HasVOP3Literal; }
- bool hasNoDataDepHazard() const {
- return HasNoDataDepHazard;
- }
+ bool hasNoDataDepHazard() const { return HasNoDataDepHazard; }
bool vmemWriteNeedsExpWaitcnt() const {
return getGeneration() < SEA_ISLANDS;
@@ -974,15 +786,11 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
// dynamic realignment in common cases.
Align getStackAlignment() const { return Align(16); }
- bool enableMachineScheduler() const override {
- return true;
- }
+ bool enableMachineScheduler() const override { return true; }
bool useAA() const override;
- bool enableSubRegLiveness() const override {
- return true;
- }
+ bool enableSubRegLiveness() const override { return true; }
void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; }
@@ -991,9 +799,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
// XXX - Why is this here if it isn't in the default pass set?
- bool enableEarlyIfConversion() const override {
- return true;
- }
+ bool enableEarlyIfConversion() const override { return true; }
void overrideSchedPolicy(MachineSchedPolicy &Policy,
unsigned NumRegionInstrs) const override;
@@ -1004,17 +810,11 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return AMDGPU::getMaxNumUserSGPRs(*this);
}
- bool hasSMemRealTime() const {
- return HasSMemRealTime;
- }
+ bool hasSMemRealTime() const { return HasSMemRealTime; }
- bool hasMovrel() const {
- return HasMovrel;
- }
+ bool hasMovrel() const { return HasMovrel; }
- bool hasVGPRIndexMode() const {
- return HasVGPRIndexMode;
- }
+ bool hasVGPRIndexMode() const { return HasVGPRIndexMode; }
bool useVGPRIndexMode() const;
@@ -1024,13 +824,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasScalarDwordx3Loads() const { return HasScalarDwordx3Loads; }
- bool hasScalarStores() const {
- return HasScalarStores;
- }
+ bool hasScalarStores() const { return HasScalarStores; }
- bool hasScalarAtomics() const {
- return HasScalarAtomics;
- }
+ bool hasScalarAtomics() const { return HasScalarAtomics; }
bool hasLDSFPAtomicAddF32() const { return GFX8Insts; }
bool hasLDSFPAtomicAddF64() const { return GFX90AInsts; }
@@ -1041,60 +837,40 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// \returns true if the subtarget has the v_permlane64_b32 instruction.
bool hasPermLane64() const { return getGeneration() >= GFX11; }
- bool hasDPP() const {
- return HasDPP;
- }
+ bool hasDPP() const { return HasDPP; }
- bool hasDPPBroadcasts() const {
- return HasDPP && getGeneration() < GFX10;
- }
+ bool hasDPPBroadcasts() const { return HasDPP && getGeneration() < GFX10; }
bool hasDPPWavefrontShifts() const {
return HasDPP && getGeneration() < GFX10;
}
- bool hasDPP8() const {
- return HasDPP8;
- }
+ bool hasDPP8() const { return HasDPP8; }
- bool hasDPALU_DPP() const {
- return HasDPALU_DPP;
- }
+ bool hasDPALU_DPP() const { return HasDPALU_DPP; }
bool hasDPPSrc1SGPR() const { return HasDPPSrc1SGPR; }
- bool hasPackedFP32Ops() const {
- return HasPackedFP32Ops;
- }
+ bool hasPackedFP32Ops() const { return HasPackedFP32Ops; }
// Has V_PK_MOV_B32 opcode
- bool hasPkMovB32() const {
- return GFX90AInsts;
- }
+ bool hasPkMovB32() const { return GFX90AInsts; }
bool hasFmaakFmamkF32Insts() const {
return getGeneration() >= GFX10 || hasGFX940Insts();
}
- bool hasImageInsts() const {
- return HasImageInsts;
- }
+ bool hasImageInsts() const { return HasImageInsts; }
- bool hasExtendedImageInsts() const {
- return HasExtendedImageInsts;
- }
+ bool hasExtendedImageInsts() const { return HasExtendedImageInsts; }
- bool hasR128A16() const {
- return HasR128A16;
- }
+ bool hasR128A16() const { return HasR128A16; }
bool hasA16() const { return HasA16; }
bool hasG16() const { return HasG16; }
- bool hasOffset3fBug() const {
- return HasOffset3fBug;
- }
+ bool hasOffset3fBug() const { return HasOffset3fBug; }
bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; }
@@ -1116,17 +892,11 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return AMDGPU::getNSAMaxSize(*this, HasSampler);
}
- bool hasGFX10_AEncoding() const {
- return GFX10_AEncoding;
- }
+ bool hasGFX10_AEncoding() const { return GFX10_AEncoding; }
- bool hasGFX10_BEncoding() const {
- return GFX10_BEncoding;
- }
+ bool hasGFX10_BEncoding() const { return GFX10_BEncoding; }
- bool hasGFX10_3Insts() const {
- return GFX10_3Insts;
- }
+ bool hasGFX10_3Insts() const { return GFX10_3Insts; }
bool hasMadF16() const;
@@ -1134,21 +904,13 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasLshlAddB64() const { return GFX940Insts; }
- bool enableSIScheduler() const {
- return EnableSIScheduler;
- }
+ bool enableSIScheduler() const { return EnableSIScheduler; }
- bool loadStoreOptEnabled() const {
- return EnableLoadStoreOpt;
- }
+ bool loadStoreOptEnabled() const { return EnableLoadStoreOpt; }
- bool hasSGPRInitBug() const {
- return SGPRInitBug;
- }
+ bool hasSGPRInitBug() const { return SGPRInitBug; }
- bool hasUserSGPRInit16Bug() const {
- return UserSGPRInit16Bug && isWave32();
- }
+ bool hasUserSGPRInit16Bug() const { return UserSGPRInit16Bug && isWave32(); }
bool hasNegativeScratchOffsetBug() const { return NegativeScratchOffsetBug; }
@@ -1156,18 +918,14 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return NegativeUnalignedScratchOffsetBug;
}
- bool hasMFMAInlineLiteralBug() const {
- return HasMFMAInlineLiteralBug;
- }
+ bool hasMFMAInlineLiteralBug() const { return HasMFMAInlineLiteralBug; }
bool has12DWordStoreHazard() const {
return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
}
// \returns true if the subtarget supports DWORDX3 load/store instructions.
- bool hasDwordx3LoadStores() const {
- return CIInsts;
- }
+ bool hasDwordx3LoadStores() const { return CIInsts; }
bool hasReadM0MovRelInterpHazard() const {
return getGeneration() == AMDGPUSubtarget::GFX9;
@@ -1186,39 +944,23 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return getGeneration() == AMDGPUSubtarget::GFX9;
}
- bool hasVcmpxPermlaneHazard() const {
- return HasVcmpxPermlaneHazard;
- }
+ bool hasVcmpxPermlaneHazard() const { return HasVcmpxPermlaneHazard; }
- bool hasVMEMtoScalarWriteHazard() const {
- return HasVMEMtoScalarWriteHazard;
- }
+ bool hasVMEMtoScalarWriteHazard() const { return HasVMEMtoScalarWriteHazard; }
- bool hasSMEMtoVectorWriteHazard() const {
- return HasSMEMtoVectorWriteHazard;
- }
+ bool hasSMEMtoVectorWriteHazard() const { return HasSMEMtoVectorWriteHazard; }
- bool hasLDSMisalignedBug() const {
- return LDSMisalignedBug && !EnableCuMode;
- }
+ bool hasLDSMisalignedBug() const { return LDSMisalignedBug && !EnableCuMode; }
- bool hasInstFwdPrefetchBug() const {
- return HasInstFwdPrefetchBug;
- }
+ bool hasInstFwdPrefetchBug() const { return HasInstFwdPrefetchBug; }
- bool hasVcmpxExecWARHazard() const {
- return HasVcmpxExecWARHazard;
- }
+ bool hasVcmpxExecWARHazard() const { return HasVcmpxExecWARHazard; }
- bool hasLdsBranchVmemWARHazard() const {
- return HasLdsBranchVmemWARHazard;
- }
+ bool hasLdsBranchVmemWARHazard() const { return HasLdsBranchVmemWARHazard; }
// Shift amount of a 64 bit shift cannot be a highest allocated register
// if also at the end of the allocation block.
- bool hasShift64HighRegBug() const {
- return GFX90AInsts && !GFX940Insts;
- }
+ bool hasShift64HighRegBug() const { return GFX90AInsts && !GFX940Insts; }
// Has one cycle hazard on transcendental instruction feeding a
// non transcendental VALU.
@@ -1232,13 +974,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasDOTOpSelHazard() const { return GFX940Insts || GFX11Insts; }
// Does not have HW interlocs for VALU writing and then reading SGPRs.
- bool hasVDecCoExecHazard() const {
- return GFX940Insts;
- }
+ bool hasVDecCoExecHazard() const { return GFX940Insts; }
- bool hasNSAtoVMEMBug() const {
- return HasNSAtoVMEMBug;
- }
+ bool hasNSAtoVMEMBug() const { return HasNSAtoVMEMBug; }
bool hasNSAClauseBug() const { return HasNSAClauseBug; }
@@ -1308,9 +1046,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// Returns true if the target supports
/// global_load_lds_dwordx3/global_load_lds_dwordx4 or
/// buffer_load_dwordx3/buffer_load_dwordx4 with the lds bit.
- bool hasLDSLoadB96_B128() const {
- return hasGFX950Insts();
- }
+ bool hasLDSLoadB96_B128() const { return hasGFX950Insts(); }
bool hasSALUFloatInsts() const { return HasSALUFloatInsts; }
@@ -1341,17 +1077,11 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasPermlane32Swap() const { return HasPermlane32Swap; }
bool hasAshrPkInsts() const { return HasAshrPkInsts; }
- bool hasMinimum3Maximum3F32() const {
- return HasMinimum3Maximum3F32;
- }
+ bool hasMinimum3Maximum3F32() const { return HasMinimum3Maximum3F32; }
- bool hasMinimum3Maximum3F16() const {
- return HasMinimum3Maximum3F16;
- }
+ bool hasMinimum3Maximum3F16() const { return HasMinimum3Maximum3F16; }
- bool hasMinimum3Maximum3PKF16() const {
- return HasMinimum3Maximum3PKF16;
- }
+ bool hasMinimum3Maximum3PKF16() const { return HasMinimum3Maximum3PKF16; }
/// \returns The maximum number of instructions that can be enclosed in an
/// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
@@ -1368,13 +1098,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// VGPRs
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
- /// Subtarget's minimum/maximum occupancy, in number of waves per EU, that can
- /// be achieved when the only function running on a CU is \p F, each workgroup
- /// uses \p LDSSize bytes of LDS, and each wave uses \p NumSGPRs SGPRs and \p
- /// NumVGPRs VGPRs. The flat workgroup sizes associated to the function are a
- /// range, so this returns a range as well.
- ///
- /// Note that occupancy can be affected by the scratch allocation as well, but
+ /// Return occupancy for the given function. Used LDS and a number of
+ /// registers if provided.
+ /// Note, occupancy can be affected by the scratch allocation as well, but
/// we do not have enough information to compute it.
std::pair<unsigned, unsigned> computeOccupancy(const Function &F,
unsigned LDSSize = 0,
@@ -1402,9 +1128,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// \returns true if the machine has merged shaders in which s0-s7 are
/// reserved by the hardware and user SGPRs start at s8
- bool hasMergedShaders() const {
- return getGeneration() >= GFX9;
- }
+ bool hasMergedShaders() const { return getGeneration() >= GFX9; }
// \returns true if the target supports the pre-NGG legacy geometry path.
bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
@@ -1547,16 +1271,30 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
}
+ /// \returns the minimum number of ArchVGPRs that will prevent achieving more
+ /// than the specified number of waves \p WavesPerEU.
+ unsigned getMinNumArchVGPRs(unsigned WavesPerEU) const {
+ return AMDGPU::IsaInfo::getMinNumArchVGPRs(this, WavesPerEU);
+ }
+
/// \returns the maximum number of VGPRs that can be used and still achieved
/// at least the specified number of waves \p WavesPerEU.
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
}
- /// \returns max num VGPRs. This is the common utility function
- /// called by MachineFunction and Function variants of getMaxNumVGPRs.
+ /// \returns the maximum number of ArchVGPRs that can be used and still
+ /// achieve at least the specified number of waves \p WavesPerEU.
+ unsigned getMaxNumArchVGPRs(unsigned WavesPerEU) const {
+ return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
+ }
+
+ /// \returns max num VGPRs. This is the common utility function called by
+ /// MachineFunction and Function variants of getMaxNum[Arch]VGPRs.
unsigned getBaseMaxNumVGPRs(const Function &F,
- std::pair<unsigned, unsigned> WavesPerEU) const;
+ std::pair<unsigned, unsigned> NumVGPRBounds,
+ bool UnifiedRF) const;
+
/// \returns Maximum number of VGPRs that meets number of waves per execution
/// unit requirement for function \p F, or number of VGPRs explicitly
/// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
@@ -1567,9 +1305,13 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// unit requirement.
unsigned getMaxNumVGPRs(const Function &F) const;
- unsigned getMaxNumAGPRs(const Function &F) const {
- return getMaxNumVGPRs(F);
- }
+ /// Returns the maximum number of ArchVGPRs that meets number of waves per
+ /// execution unit requirement for function \p F, or number of ArchVGPRs
+ /// explicitly requested using "amdgpu-num-vgpr" attribute attached to
+ /// function \p F.
+ unsigned getMaxNumArchVGPRs(const Function &F) const;
+
+ unsigned getMaxNumAGPRs(const Function &F) const { return getMaxNumVGPRs(F); }
/// \returns Maximum number of VGPRs that meets number of waves per execution
/// unit requirement for function \p MF, or number of VGPRs explicitly
@@ -1581,13 +1323,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// unit requirement.
unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
- bool isWave32() const {
- return getWavefrontSize() == 32;
- }
+ bool isWave32() const { return getWavefrontSize() == 32; }
- bool isWave64() const {
- return getWavefrontSize() == 64;
- }
+ bool isWave64() const { return getWavefrontSize() == 64; }
/// Returns if the wavesize of this subtarget is known reliable. This is false
/// only for the a default target-cpu that does not have an explicit
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 59afcbed35294..5f229b310f686 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1240,28 +1240,36 @@ unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
return 5;
}
-unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
+unsigned getBaseMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
+ unsigned TotalNumVGPRs,
+ unsigned NumAddressableVGPRs) {
assert(WavesPerEU != 0);
-
unsigned MaxWavesPerEU = getMaxWavesPerEU(STI);
if (WavesPerEU >= MaxWavesPerEU)
return 0;
+ unsigned MinWavesPerEU =
+ getNumWavesPerEUWithNumVGPRs(STI, NumAddressableVGPRs);
+ if (WavesPerEU < MinWavesPerEU)
+ return getMinNumVGPRs(STI, MinWavesPerEU);
- unsigned TotNumVGPRs = getTotalNumVGPRs(STI);
- unsigned AddrsableNumVGPRs = getAddressableNumVGPRs(STI);
unsigned Granule = getVGPRAllocGranule(STI);
- unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule);
-
- if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))
+ unsigned MaxNumVGPRs = alignDown(TotalNumVGPRs / WavesPerEU, Granule);
+ if (MaxNumVGPRs == alignDown(TotalNumVGPRs / MaxWavesPerEU, Granule))
return 0;
+ unsigned MaxNumVGPRsNext =
+ alignDown(TotalNumVGPRs / (WavesPerEU + 1), Granule);
+ unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
+ return std::min(MinNumVGPRs, NumAddressableVGPRs);
+}
- unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs);
- if (WavesPerEU < MinWavesPerEU)
- return getMinNumVGPRs(STI, MinWavesPerEU);
+unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
+ return getBaseMinNumVGPRs(STI, WavesPerEU, getTotalNumVGPRs(STI),
+ getAddressableNumVGPRs(STI));
+}
- unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);
- unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
- return std::min(MinNumVGPRs, AddrsableNumVGPRs);
+unsigned getMinNumArchVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
+ unsigned TotNumArchVGPRs = getAddressableNumArchVGPRs(STI);
+ return getBaseMinNumVGPRs(STI, WavesPerEU, TotNumArchVGPRs, TotNumArchVGPRs);
}
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
@@ -1273,6 +1281,12 @@ unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
return std::min(MaxNumVGPRs, AddressableNumVGPRs);
}
+unsigned getMaxNumArchVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
+ assert(WavesPerEU != 0);
+ return alignDown(getAddressableNumArchVGPRs(STI) / WavesPerEU,
+ getVGPRAllocGranule(STI));
+}
+
unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
std::optional<bool> EnableWavefrontSize32) {
return getGranulatedNumRegisterBlocks(
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 67bebfb3418d5..ebcbd0c07506b 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -318,14 +318,29 @@ unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI);
/// \returns Addressable number of VGPRs for given subtarget \p STI.
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
+/// Returns the minimum number of VGPRs that meets given number of waves per
+/// execution unit requirement for given subtarget \p STI with \p TotNumVGPRs
+/// VGPRs available and \p NumAddressableVGPRs addressable VGPRs.
+unsigned getBaseMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
+ unsigned TotalNumVGPRs,
+ unsigned NumAddressableVGPRs);
+
/// \returns Minimum number of VGPRs that meets given number of waves per
/// execution unit requirement for given subtarget \p STI.
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
+/// \returns Minimum number of ArchVGPRs that meets given number of waves per
+/// execution unit requirement for given subtarget \p STI.
+unsigned getMinNumArchVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
+
/// \returns Maximum number of VGPRs that meets given number of waves per
/// execution unit requirement for given subtarget \p STI.
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
+/// \returns Maximum number of ArchVGPRs that meets given number of waves per
+/// execution unit requirement for given subtarget \p STI.
+unsigned getMaxNumArchVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
+
/// \returns Number of waves reachable for a given \p NumVGPRs usage for given
/// subtarget \p STI.
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir
new file mode 100644
index 0000000000000..3c15201cd698e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir
@@ -0,0 +1,221 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -run-pass=machine-scheduler -amdgpu-disable-unclustered-high-rp-reschedule -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX908 %s
+
+--- |
+ ; User-requested maximum number of VGPRs need to be taken into account by
+ ; the scheduler's rematerialization stage. Register usage above that number
+ ; is considered like spill; occupancy is "inadvertently" increased when
+ ; eliminating spill.
+ define void @small_num_vgprs_as_spill() "amdgpu-num-vgpr"="28" {
+ ret void
+ }
+
+ ; Min/Max occupancy is 8, the scheduler's rematerialization stage should not
+ ; try to rematerialize instructions.
+ define void @dont_remat_at_max_occ() "amdgpu-flat-work-group-size"="1024,1024" {
+ ret void
+ }
+---
+name: small_num_vgprs_as_spill
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; GFX908-LABEL: name: small_num_vgprs_as_spill
+ ; GFX908: bb.0:
+ ; GFX908-NEXT: successors: %bb.1(0x80000000)
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.1:
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]], implicit [[V_CVT_I32_F64_e32_27]]
+ ; GFX908-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1
+
+ %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
+ %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ %3:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ %4:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
+ %5:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
+ %6:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
+ %7:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
+ %8:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
+ %9:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
+ %10:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ %11:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
+ %12:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ %13:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
+ %14:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
+ %15:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
+ %16:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
+ %17:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ %18:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
+ %19:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ %20:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ %21:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ %22:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ %23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ %25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ %26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ %27:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
+ %28:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
+ %29:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
+ %30:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
+ %31:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
+ %32:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
+ %33:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode
+
+ bb.1:
+
+ S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4
+ S_NOP 0, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9
+ S_NOP 0, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14
+ S_NOP 0, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19
+ S_NOP 0, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24
+ S_NOP 0, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29
+ S_NOP 0, implicit %30, implicit %31, implicit %32, implicit %33
+
+ S_ENDPGM 0
+...
+---
+name: dont_remat_at_max_occ
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; GFX908-LABEL: name: dont_remat_at_max_occ
+ ; GFX908: bb.0:
+ ; GFX908-NEXT: successors: %bb.1(0x80000000)
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.1:
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]]
+ ; GFX908-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1
+
+ %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
+ %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ %3:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ %4:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
+ %5:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
+ %6:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
+ %7:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
+ %8:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
+ %9:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
+ %10:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ %11:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
+ %12:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ %13:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
+ %14:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
+ %15:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
+ %16:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
+ %17:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ %18:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
+ %19:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ %20:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ %21:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ %22:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ %23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode,
+ %25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode,
+ %26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode,
+ %27:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode,
+ %28:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode,
+ %29:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode,
+ %30:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode,
+ %31:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode,
+
+ bb.1:
+
+ S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4
+ S_NOP 0, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9
+ S_NOP 0, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14
+ S_NOP 0, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19
+ S_NOP 0, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24
+ S_NOP 0, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29
+ S_NOP 0, implicit %30, implicit %31
+
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-debug.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-debug.mir
index 5dc6d2ee8f695..371753801d1a3 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-debug.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-debug.mir
@@ -17,7 +17,7 @@ machineFunctionInfo:
isEntryFunction: true
body: |
; DEBUG: Machine code for function sink_and_inc_idx_when_skipping_small_region_1: IsSSA, NoPHIs, TracksLiveness
- ; DEBUG: Retrying function scheduling with improved occupancy of 10 from rematerializing
+ ; DEBUG: [PreRARemat] Retrying function scheduling with new min. occupancy of 10 from rematerializing (original was 9, target was 10)
; DEBUG-NEXT: ********** MI Scheduling **********
; DEBUG-NEXT: sink_and_inc_idx_when_skipping_small_region_1:%bb.2
; DEBUG-NEXT: From: %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
@@ -89,7 +89,7 @@ machineFunctionInfo:
isEntryFunction: true
body: |
; DEBUG: Machine code for function sink_and_inc_idx_when_skipping_small_regions_2: IsSSA, NoPHIs, TracksLiveness
- ; DEBUG: Retrying function scheduling with improved occupancy of 10 from rematerializing
+ ; DEBUG: [PreRARemat] Retrying function scheduling with new min. occupancy of 10 from rematerializing (original was 9, target was 10)
; DEBUG-NEXT: ********** MI Scheduling **********
; DEBUG-NEXT: sink_and_inc_idx_when_skipping_small_regions_2:%bb.2
; DEBUG-NEXT: From: %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
index 9991cb1837e01..1170ec0b88124 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
@@ -725,11 +725,11 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
- ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
@@ -1168,18 +1168,18 @@ body: |
; GFX908-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
; GFX908-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
+ ; GFX908-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
+ ; GFX908-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
+ ; GFX908-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
+ ; GFX908-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
+ ; GFX908-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
; GFX908-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
; GFX908-NEXT: [[S_MOV_B32_80:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
; GFX908-NEXT: [[S_MOV_B32_81:%[0-9]+]]:sgpr_32 = S_MOV_B32 80
@@ -2956,11 +2956,11 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
- ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
@@ -3134,11 +3134,11 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
- ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
@@ -3324,11 +3324,11 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_34:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 34, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
- ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
@@ -4920,6 +4920,604 @@ body: |
S_ENDPGM 0
...
---
+name: test_occ_1_sink_for_spill
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; GFX908-LABEL: name: test_occ_1_sink_for_spill
+ ; GFX908: bb.0:
+ ; GFX908-NEXT: successors: %bb.1(0x80000000)
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.1:
+ ; GFX908-NEXT: successors: %bb.2(0x80000000)
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_34:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 34, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_35:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 35, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_36:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 36, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_37:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 37, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_38:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 38, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_39:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 39, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_40:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 40, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_41:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 41, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_42:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 42, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_43:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 43, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_44:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 44, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_45:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 45, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_46:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 46, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_47:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 47, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_48:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 48, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_49:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 49, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_50:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 50, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_51:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 51, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_52:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 52, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_53:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 53, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_54:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 54, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_55:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 55, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_56:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 56, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_57:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 57, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_58:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 58, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_59:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 59, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_60:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 60, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_61:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 61, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_62:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 62, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_63:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 63, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_64:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 64, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_65:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 65, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_66:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 66, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_67:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 67, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_68:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 68, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_69:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 69, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_70:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 70, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_71:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 71, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_72:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 72, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_73:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 73, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_74:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 74, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_75:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 75, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_76:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 76, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_77:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 77, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_78:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 78, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_79:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 79, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_80:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 80, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_81:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 81, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_82:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 82, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_83:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 83, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_84:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 84, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_85:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 85, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_86:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 86, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_87:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 87, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_88:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 88, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_89:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 89, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_90:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 90, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_91:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 91, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_92:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 92, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_93:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 93, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_94:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 94, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_95:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 95, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_96:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 96, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_97:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 97, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_98:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 98, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_99:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 99, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_100:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 100, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_101:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 101, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_102:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 102, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_103:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 103, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_104:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 104, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_105:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 105, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_106:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 106, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_107:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 107, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_108:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 108, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_109:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 109, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_110:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 110, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_111:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 111, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_112:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 112, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_113:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 113, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_114:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 114, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_115:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 115, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_116:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 116, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_117:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 117, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_118:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 118, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_119:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 119, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_120:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 120, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_121:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 121, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_122:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 122, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_123:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 123, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_124:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 124, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_125:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 125, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_126:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 126, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_127:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 127, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_128:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 128, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_129:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 129, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_130:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 130, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_131:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 131, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_132:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 132, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_133:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 133, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_134:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 134, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_135:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 135, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_136:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 136, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_137:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 137, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_138:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 138, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_139:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 139, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_140:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 140, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_141:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 141, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_142:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 142, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_143:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 143, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_144:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 144, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_145:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 145, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_146:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 146, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_147:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 147, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_148:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 148, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_149:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 149, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_150:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 150, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_151:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 151, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_152:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 152, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_153:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 153, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_154:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 154, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_155:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 155, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_156:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 156, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_157:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 157, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_158:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 158, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_159:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 159, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_160:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 160, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_161:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 161, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_162:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 162, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_163:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 163, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_164:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 164, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_165:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 165, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_166:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 166, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_167:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 167, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_168:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 168, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_169:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 169, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_170:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 170, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_171:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 171, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_172:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 172, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_173:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 173, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_174:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 174, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_175:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 175, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_176:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 176, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_177:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 177, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_178:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 178, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_179:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 179, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_180:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 180, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_181:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 181, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_182:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 182, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_183:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 183, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_184:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 184, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_185:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 185, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_186:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 186, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_187:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 187, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_188:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 188, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_189:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 189, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_190:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 190, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_191:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 191, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_192:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 192, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_193:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 193, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_194:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 194, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_195:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 195, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_196:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 196, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_197:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 197, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_198:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 198, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_199:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 199, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_200:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 200, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_201:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 201, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_202:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 202, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_203:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 203, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_204:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 204, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_205:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 205, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_206:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 206, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_207:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 207, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_208:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 208, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_209:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 209, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_210:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 210, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_211:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 211, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_212:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 212, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_213:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 213, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_214:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 214, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_215:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 215, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_216:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 216, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_217:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 217, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_218:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 218, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_219:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 219, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_220:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 220, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_221:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 221, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_222:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 222, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_223:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 223, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_224:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 224, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_225:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 225, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_226:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 226, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_227:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 227, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_228:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 228, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_229:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 229, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_230:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 230, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_231:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 231, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_232:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 232, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_233:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 233, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_234:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 234, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_235:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 235, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_236:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 236, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_237:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 237, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_238:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 238, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_239:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 239, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_240:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 240, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_241:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 241, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_242:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 242, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_243:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 243, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_244:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 244, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_245:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 245, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_246:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 246, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_247:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 247, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_248:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 248, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_249:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 249, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_250:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 250, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_251:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 251, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_252:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 252, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_253:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 253, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_254:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 254, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.2:
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]], implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]], implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]], implicit [[V_CVT_I32_F64_e32_34]], implicit [[V_CVT_I32_F64_e32_35]], implicit [[V_CVT_I32_F64_e32_36]], implicit [[V_CVT_I32_F64_e32_37]], implicit [[V_CVT_I32_F64_e32_38]], implicit [[V_CVT_I32_F64_e32_39]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_40]], implicit [[V_CVT_I32_F64_e32_41]], implicit [[V_CVT_I32_F64_e32_42]], implicit [[V_CVT_I32_F64_e32_43]], implicit [[V_CVT_I32_F64_e32_44]], implicit [[V_CVT_I32_F64_e32_45]], implicit [[V_CVT_I32_F64_e32_46]], implicit [[V_CVT_I32_F64_e32_47]], implicit [[V_CVT_I32_F64_e32_48]], implicit [[V_CVT_I32_F64_e32_49]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_50]], implicit [[V_CVT_I32_F64_e32_51]], implicit [[V_CVT_I32_F64_e32_52]], implicit [[V_CVT_I32_F64_e32_53]], implicit [[V_CVT_I32_F64_e32_54]], implicit [[V_CVT_I32_F64_e32_55]], implicit [[V_CVT_I32_F64_e32_56]], implicit [[V_CVT_I32_F64_e32_57]], implicit [[V_CVT_I32_F64_e32_58]], implicit [[V_CVT_I32_F64_e32_59]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_60]], implicit [[V_CVT_I32_F64_e32_61]], implicit [[V_CVT_I32_F64_e32_62]], implicit [[V_CVT_I32_F64_e32_63]], implicit [[V_CVT_I32_F64_e32_64]], implicit [[V_CVT_I32_F64_e32_65]], implicit [[V_CVT_I32_F64_e32_66]], implicit [[V_CVT_I32_F64_e32_67]], implicit [[V_CVT_I32_F64_e32_68]], implicit [[V_CVT_I32_F64_e32_69]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_70]], implicit [[V_CVT_I32_F64_e32_71]], implicit [[V_CVT_I32_F64_e32_72]], implicit [[V_CVT_I32_F64_e32_73]], implicit [[V_CVT_I32_F64_e32_74]], implicit [[V_CVT_I32_F64_e32_75]], implicit [[V_CVT_I32_F64_e32_76]], implicit [[V_CVT_I32_F64_e32_77]], implicit [[V_CVT_I32_F64_e32_78]], implicit [[V_CVT_I32_F64_e32_79]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_80]], implicit [[V_CVT_I32_F64_e32_81]], implicit [[V_CVT_I32_F64_e32_82]], implicit [[V_CVT_I32_F64_e32_83]], implicit [[V_CVT_I32_F64_e32_84]], implicit [[V_CVT_I32_F64_e32_85]], implicit [[V_CVT_I32_F64_e32_86]], implicit [[V_CVT_I32_F64_e32_87]], implicit [[V_CVT_I32_F64_e32_88]], implicit [[V_CVT_I32_F64_e32_89]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_90]], implicit [[V_CVT_I32_F64_e32_91]], implicit [[V_CVT_I32_F64_e32_92]], implicit [[V_CVT_I32_F64_e32_93]], implicit [[V_CVT_I32_F64_e32_94]], implicit [[V_CVT_I32_F64_e32_95]], implicit [[V_CVT_I32_F64_e32_96]], implicit [[V_CVT_I32_F64_e32_97]], implicit [[V_CVT_I32_F64_e32_98]], implicit [[V_CVT_I32_F64_e32_99]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_100]], implicit [[V_CVT_I32_F64_e32_101]], implicit [[V_CVT_I32_F64_e32_102]], implicit [[V_CVT_I32_F64_e32_103]], implicit [[V_CVT_I32_F64_e32_104]], implicit [[V_CVT_I32_F64_e32_105]], implicit [[V_CVT_I32_F64_e32_106]], implicit [[V_CVT_I32_F64_e32_107]], implicit [[V_CVT_I32_F64_e32_108]], implicit [[V_CVT_I32_F64_e32_109]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_110]], implicit [[V_CVT_I32_F64_e32_111]], implicit [[V_CVT_I32_F64_e32_112]], implicit [[V_CVT_I32_F64_e32_113]], implicit [[V_CVT_I32_F64_e32_114]], implicit [[V_CVT_I32_F64_e32_115]], implicit [[V_CVT_I32_F64_e32_116]], implicit [[V_CVT_I32_F64_e32_117]], implicit [[V_CVT_I32_F64_e32_118]], implicit [[V_CVT_I32_F64_e32_119]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_120]], implicit [[V_CVT_I32_F64_e32_121]], implicit [[V_CVT_I32_F64_e32_122]], implicit [[V_CVT_I32_F64_e32_123]], implicit [[V_CVT_I32_F64_e32_124]], implicit [[V_CVT_I32_F64_e32_125]], implicit [[V_CVT_I32_F64_e32_126]], implicit [[V_CVT_I32_F64_e32_127]], implicit [[V_CVT_I32_F64_e32_128]], implicit [[V_CVT_I32_F64_e32_129]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_130]], implicit [[V_CVT_I32_F64_e32_131]], implicit [[V_CVT_I32_F64_e32_132]], implicit [[V_CVT_I32_F64_e32_133]], implicit [[V_CVT_I32_F64_e32_134]], implicit [[V_CVT_I32_F64_e32_135]], implicit [[V_CVT_I32_F64_e32_136]], implicit [[V_CVT_I32_F64_e32_137]], implicit [[V_CVT_I32_F64_e32_138]], implicit [[V_CVT_I32_F64_e32_139]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_140]], implicit [[V_CVT_I32_F64_e32_141]], implicit [[V_CVT_I32_F64_e32_142]], implicit [[V_CVT_I32_F64_e32_143]], implicit [[V_CVT_I32_F64_e32_144]], implicit [[V_CVT_I32_F64_e32_145]], implicit [[V_CVT_I32_F64_e32_146]], implicit [[V_CVT_I32_F64_e32_147]], implicit [[V_CVT_I32_F64_e32_148]], implicit [[V_CVT_I32_F64_e32_149]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_150]], implicit [[V_CVT_I32_F64_e32_151]], implicit [[V_CVT_I32_F64_e32_152]], implicit [[V_CVT_I32_F64_e32_153]], implicit [[V_CVT_I32_F64_e32_154]], implicit [[V_CVT_I32_F64_e32_155]], implicit [[V_CVT_I32_F64_e32_156]], implicit [[V_CVT_I32_F64_e32_157]], implicit [[V_CVT_I32_F64_e32_158]], implicit [[V_CVT_I32_F64_e32_159]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_160]], implicit [[V_CVT_I32_F64_e32_161]], implicit [[V_CVT_I32_F64_e32_162]], implicit [[V_CVT_I32_F64_e32_163]], implicit [[V_CVT_I32_F64_e32_164]], implicit [[V_CVT_I32_F64_e32_165]], implicit [[V_CVT_I32_F64_e32_166]], implicit [[V_CVT_I32_F64_e32_167]], implicit [[V_CVT_I32_F64_e32_168]], implicit [[V_CVT_I32_F64_e32_169]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_170]], implicit [[V_CVT_I32_F64_e32_171]], implicit [[V_CVT_I32_F64_e32_172]], implicit [[V_CVT_I32_F64_e32_173]], implicit [[V_CVT_I32_F64_e32_174]], implicit [[V_CVT_I32_F64_e32_175]], implicit [[V_CVT_I32_F64_e32_176]], implicit [[V_CVT_I32_F64_e32_177]], implicit [[V_CVT_I32_F64_e32_178]], implicit [[V_CVT_I32_F64_e32_179]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_180]], implicit [[V_CVT_I32_F64_e32_181]], implicit [[V_CVT_I32_F64_e32_182]], implicit [[V_CVT_I32_F64_e32_183]], implicit [[V_CVT_I32_F64_e32_184]], implicit [[V_CVT_I32_F64_e32_185]], implicit [[V_CVT_I32_F64_e32_186]], implicit [[V_CVT_I32_F64_e32_187]], implicit [[V_CVT_I32_F64_e32_188]], implicit [[V_CVT_I32_F64_e32_189]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_190]], implicit [[V_CVT_I32_F64_e32_191]], implicit [[V_CVT_I32_F64_e32_192]], implicit [[V_CVT_I32_F64_e32_193]], implicit [[V_CVT_I32_F64_e32_194]], implicit [[V_CVT_I32_F64_e32_195]], implicit [[V_CVT_I32_F64_e32_196]], implicit [[V_CVT_I32_F64_e32_197]], implicit [[V_CVT_I32_F64_e32_198]], implicit [[V_CVT_I32_F64_e32_199]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_200]], implicit [[V_CVT_I32_F64_e32_201]], implicit [[V_CVT_I32_F64_e32_202]], implicit [[V_CVT_I32_F64_e32_203]], implicit [[V_CVT_I32_F64_e32_204]], implicit [[V_CVT_I32_F64_e32_205]], implicit [[V_CVT_I32_F64_e32_206]], implicit [[V_CVT_I32_F64_e32_207]], implicit [[V_CVT_I32_F64_e32_208]], implicit [[V_CVT_I32_F64_e32_209]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_210]], implicit [[V_CVT_I32_F64_e32_211]], implicit [[V_CVT_I32_F64_e32_212]], implicit [[V_CVT_I32_F64_e32_213]], implicit [[V_CVT_I32_F64_e32_214]], implicit [[V_CVT_I32_F64_e32_215]], implicit [[V_CVT_I32_F64_e32_216]], implicit [[V_CVT_I32_F64_e32_217]], implicit [[V_CVT_I32_F64_e32_218]], implicit [[V_CVT_I32_F64_e32_219]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_220]], implicit [[V_CVT_I32_F64_e32_221]], implicit [[V_CVT_I32_F64_e32_222]], implicit [[V_CVT_I32_F64_e32_223]], implicit [[V_CVT_I32_F64_e32_224]], implicit [[V_CVT_I32_F64_e32_225]], implicit [[V_CVT_I32_F64_e32_226]], implicit [[V_CVT_I32_F64_e32_227]], implicit [[V_CVT_I32_F64_e32_228]], implicit [[V_CVT_I32_F64_e32_229]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_230]], implicit [[V_CVT_I32_F64_e32_231]], implicit [[V_CVT_I32_F64_e32_232]], implicit [[V_CVT_I32_F64_e32_233]], implicit [[V_CVT_I32_F64_e32_234]], implicit [[V_CVT_I32_F64_e32_235]], implicit [[V_CVT_I32_F64_e32_236]], implicit [[V_CVT_I32_F64_e32_237]], implicit [[V_CVT_I32_F64_e32_238]], implicit [[V_CVT_I32_F64_e32_239]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_240]], implicit [[V_CVT_I32_F64_e32_241]], implicit [[V_CVT_I32_F64_e32_242]], implicit [[V_CVT_I32_F64_e32_243]], implicit [[V_CVT_I32_F64_e32_244]], implicit [[V_CVT_I32_F64_e32_245]], implicit [[V_CVT_I32_F64_e32_246]], implicit [[V_CVT_I32_F64_e32_247]], implicit [[V_CVT_I32_F64_e32_248]], implicit [[V_CVT_I32_F64_e32_249]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_250]], implicit [[V_CVT_I32_F64_e32_251]], implicit [[V_CVT_I32_F64_e32_252]], implicit [[V_CVT_I32_F64_e32_253]], implicit [[V_CVT_I32_F64_e32_254]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_255:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 255, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_256:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 256, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_255]], implicit [[V_CVT_I32_F64_e32_256]]
+ ; GFX908-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1
+
+ %255:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 255, implicit $exec, implicit $mode
+ %256:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 256, implicit $exec, implicit $mode
+
+ bb.1:
+ successors: %bb.2
+
+ %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
+ %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ %3:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ %4:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
+ %5:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
+ %6:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
+ %7:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
+ %8:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
+ %9:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
+ %10:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ %11:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
+ %12:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ %13:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
+ %14:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
+ %15:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
+ %16:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
+ %17:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ %18:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
+ %19:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ %20:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ %21:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ %22:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ %23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ %25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ %26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ %27:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
+ %28:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
+ %29:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
+ %30:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
+ %31:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
+ %32:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
+ %33:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode, implicit-def $m0
+ %34:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 34, implicit $exec, implicit $mode, implicit-def $m0
+ %35:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 35, implicit $exec, implicit $mode, implicit-def $m0
+ %36:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 36, implicit $exec, implicit $mode, implicit-def $m0
+ %37:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 37, implicit $exec, implicit $mode, implicit-def $m0
+ %38:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 38, implicit $exec, implicit $mode, implicit-def $m0
+ %39:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 39, implicit $exec, implicit $mode, implicit-def $m0
+ %40:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 40, implicit $exec, implicit $mode, implicit-def $m0
+ %41:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 41, implicit $exec, implicit $mode, implicit-def $m0
+ %42:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 42, implicit $exec, implicit $mode, implicit-def $m0
+ %43:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 43, implicit $exec, implicit $mode, implicit-def $m0
+ %44:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 44, implicit $exec, implicit $mode, implicit-def $m0
+ %45:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 45, implicit $exec, implicit $mode, implicit-def $m0
+ %46:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 46, implicit $exec, implicit $mode, implicit-def $m0
+ %47:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 47, implicit $exec, implicit $mode, implicit-def $m0
+ %48:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 48, implicit $exec, implicit $mode, implicit-def $m0
+ %49:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 49, implicit $exec, implicit $mode, implicit-def $m0
+ %50:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 50, implicit $exec, implicit $mode, implicit-def $m0
+ %51:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 51, implicit $exec, implicit $mode, implicit-def $m0
+ %52:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 52, implicit $exec, implicit $mode, implicit-def $m0
+ %53:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 53, implicit $exec, implicit $mode, implicit-def $m0
+ %54:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 54, implicit $exec, implicit $mode, implicit-def $m0
+ %55:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 55, implicit $exec, implicit $mode, implicit-def $m0
+ %56:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 56, implicit $exec, implicit $mode, implicit-def $m0
+ %57:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 57, implicit $exec, implicit $mode, implicit-def $m0
+ %58:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 58, implicit $exec, implicit $mode, implicit-def $m0
+ %59:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 59, implicit $exec, implicit $mode, implicit-def $m0
+ %60:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 60, implicit $exec, implicit $mode, implicit-def $m0
+ %61:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 61, implicit $exec, implicit $mode, implicit-def $m0
+ %62:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 62, implicit $exec, implicit $mode, implicit-def $m0
+ %63:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 63, implicit $exec, implicit $mode, implicit-def $m0
+ %64:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 64, implicit $exec, implicit $mode, implicit-def $m0
+ %65:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 65, implicit $exec, implicit $mode, implicit-def $m0
+ %66:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 66, implicit $exec, implicit $mode, implicit-def $m0
+ %67:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 67, implicit $exec, implicit $mode, implicit-def $m0
+ %68:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 68, implicit $exec, implicit $mode, implicit-def $m0
+ %69:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 69, implicit $exec, implicit $mode, implicit-def $m0
+ %70:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 70, implicit $exec, implicit $mode, implicit-def $m0
+ %71:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 71, implicit $exec, implicit $mode, implicit-def $m0
+ %72:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 72, implicit $exec, implicit $mode, implicit-def $m0
+ %73:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 73, implicit $exec, implicit $mode, implicit-def $m0
+ %74:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 74, implicit $exec, implicit $mode, implicit-def $m0
+ %75:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 75, implicit $exec, implicit $mode, implicit-def $m0
+ %76:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 76, implicit $exec, implicit $mode, implicit-def $m0
+ %77:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 77, implicit $exec, implicit $mode, implicit-def $m0
+ %78:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 78, implicit $exec, implicit $mode, implicit-def $m0
+ %79:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 79, implicit $exec, implicit $mode, implicit-def $m0
+ %80:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 80, implicit $exec, implicit $mode, implicit-def $m0
+ %81:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 81, implicit $exec, implicit $mode, implicit-def $m0
+ %82:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 82, implicit $exec, implicit $mode, implicit-def $m0
+ %83:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 83, implicit $exec, implicit $mode, implicit-def $m0
+ %84:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 84, implicit $exec, implicit $mode, implicit-def $m0
+ %85:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 85, implicit $exec, implicit $mode, implicit-def $m0
+ %86:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 86, implicit $exec, implicit $mode, implicit-def $m0
+ %87:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 87, implicit $exec, implicit $mode, implicit-def $m0
+ %88:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 88, implicit $exec, implicit $mode, implicit-def $m0
+ %89:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 89, implicit $exec, implicit $mode, implicit-def $m0
+ %90:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 90, implicit $exec, implicit $mode, implicit-def $m0
+ %91:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 91, implicit $exec, implicit $mode, implicit-def $m0
+ %92:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 92, implicit $exec, implicit $mode, implicit-def $m0
+ %93:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 93, implicit $exec, implicit $mode, implicit-def $m0
+ %94:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 94, implicit $exec, implicit $mode, implicit-def $m0
+ %95:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 95, implicit $exec, implicit $mode, implicit-def $m0
+ %96:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 96, implicit $exec, implicit $mode, implicit-def $m0
+ %97:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 97, implicit $exec, implicit $mode, implicit-def $m0
+ %98:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 98, implicit $exec, implicit $mode, implicit-def $m0
+ %99:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 99, implicit $exec, implicit $mode, implicit-def $m0
+ %100:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 100, implicit $exec, implicit $mode, implicit-def $m0
+ %101:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 101, implicit $exec, implicit $mode, implicit-def $m0
+ %102:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 102, implicit $exec, implicit $mode, implicit-def $m0
+ %103:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 103, implicit $exec, implicit $mode, implicit-def $m0
+ %104:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 104, implicit $exec, implicit $mode, implicit-def $m0
+ %105:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 105, implicit $exec, implicit $mode, implicit-def $m0
+ %106:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 106, implicit $exec, implicit $mode, implicit-def $m0
+ %107:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 107, implicit $exec, implicit $mode, implicit-def $m0
+ %108:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 108, implicit $exec, implicit $mode, implicit-def $m0
+ %109:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 109, implicit $exec, implicit $mode, implicit-def $m0
+ %110:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 110, implicit $exec, implicit $mode, implicit-def $m0
+ %111:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 111, implicit $exec, implicit $mode, implicit-def $m0
+ %112:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 112, implicit $exec, implicit $mode, implicit-def $m0
+ %113:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 113, implicit $exec, implicit $mode, implicit-def $m0
+ %114:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 114, implicit $exec, implicit $mode, implicit-def $m0
+ %115:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 115, implicit $exec, implicit $mode, implicit-def $m0
+ %116:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 116, implicit $exec, implicit $mode, implicit-def $m0
+ %117:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 117, implicit $exec, implicit $mode, implicit-def $m0
+ %118:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 118, implicit $exec, implicit $mode, implicit-def $m0
+ %119:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 119, implicit $exec, implicit $mode, implicit-def $m0
+ %120:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 120, implicit $exec, implicit $mode, implicit-def $m0
+ %121:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 121, implicit $exec, implicit $mode, implicit-def $m0
+ %122:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 122, implicit $exec, implicit $mode, implicit-def $m0
+ %123:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 123, implicit $exec, implicit $mode, implicit-def $m0
+ %124:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 124, implicit $exec, implicit $mode, implicit-def $m0
+ %125:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 125, implicit $exec, implicit $mode, implicit-def $m0
+ %126:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 126, implicit $exec, implicit $mode, implicit-def $m0
+ %127:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 127, implicit $exec, implicit $mode, implicit-def $m0
+ %128:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 128, implicit $exec, implicit $mode, implicit-def $m0
+ %129:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 129, implicit $exec, implicit $mode, implicit-def $m0
+ %130:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 130, implicit $exec, implicit $mode, implicit-def $m0
+ %131:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 131, implicit $exec, implicit $mode, implicit-def $m0
+ %132:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 132, implicit $exec, implicit $mode, implicit-def $m0
+ %133:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 133, implicit $exec, implicit $mode, implicit-def $m0
+ %134:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 134, implicit $exec, implicit $mode, implicit-def $m0
+ %135:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 135, implicit $exec, implicit $mode, implicit-def $m0
+ %136:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 136, implicit $exec, implicit $mode, implicit-def $m0
+ %137:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 137, implicit $exec, implicit $mode, implicit-def $m0
+ %138:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 138, implicit $exec, implicit $mode, implicit-def $m0
+ %139:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 139, implicit $exec, implicit $mode, implicit-def $m0
+ %140:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 140, implicit $exec, implicit $mode, implicit-def $m0
+ %141:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 141, implicit $exec, implicit $mode, implicit-def $m0
+ %142:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 142, implicit $exec, implicit $mode, implicit-def $m0
+ %143:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 143, implicit $exec, implicit $mode, implicit-def $m0
+ %144:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 144, implicit $exec, implicit $mode, implicit-def $m0
+ %145:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 145, implicit $exec, implicit $mode, implicit-def $m0
+ %146:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 146, implicit $exec, implicit $mode, implicit-def $m0
+ %147:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 147, implicit $exec, implicit $mode, implicit-def $m0
+ %148:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 148, implicit $exec, implicit $mode, implicit-def $m0
+ %149:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 149, implicit $exec, implicit $mode, implicit-def $m0
+ %150:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 150, implicit $exec, implicit $mode, implicit-def $m0
+ %151:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 151, implicit $exec, implicit $mode, implicit-def $m0
+ %152:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 152, implicit $exec, implicit $mode, implicit-def $m0
+ %153:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 153, implicit $exec, implicit $mode, implicit-def $m0
+ %154:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 154, implicit $exec, implicit $mode, implicit-def $m0
+ %155:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 155, implicit $exec, implicit $mode, implicit-def $m0
+ %156:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 156, implicit $exec, implicit $mode, implicit-def $m0
+ %157:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 157, implicit $exec, implicit $mode, implicit-def $m0
+ %158:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 158, implicit $exec, implicit $mode, implicit-def $m0
+ %159:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 159, implicit $exec, implicit $mode, implicit-def $m0
+ %160:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 160, implicit $exec, implicit $mode, implicit-def $m0
+ %161:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 161, implicit $exec, implicit $mode, implicit-def $m0
+ %162:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 162, implicit $exec, implicit $mode, implicit-def $m0
+ %163:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 163, implicit $exec, implicit $mode, implicit-def $m0
+ %164:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 164, implicit $exec, implicit $mode, implicit-def $m0
+ %165:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 165, implicit $exec, implicit $mode, implicit-def $m0
+ %166:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 166, implicit $exec, implicit $mode, implicit-def $m0
+ %167:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 167, implicit $exec, implicit $mode, implicit-def $m0
+ %168:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 168, implicit $exec, implicit $mode, implicit-def $m0
+ %169:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 169, implicit $exec, implicit $mode, implicit-def $m0
+ %170:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 170, implicit $exec, implicit $mode, implicit-def $m0
+ %171:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 171, implicit $exec, implicit $mode, implicit-def $m0
+ %172:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 172, implicit $exec, implicit $mode, implicit-def $m0
+ %173:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 173, implicit $exec, implicit $mode, implicit-def $m0
+ %174:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 174, implicit $exec, implicit $mode, implicit-def $m0
+ %175:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 175, implicit $exec, implicit $mode, implicit-def $m0
+ %176:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 176, implicit $exec, implicit $mode, implicit-def $m0
+ %177:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 177, implicit $exec, implicit $mode, implicit-def $m0
+ %178:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 178, implicit $exec, implicit $mode, implicit-def $m0
+ %179:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 179, implicit $exec, implicit $mode, implicit-def $m0
+ %180:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 180, implicit $exec, implicit $mode, implicit-def $m0
+ %181:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 181, implicit $exec, implicit $mode, implicit-def $m0
+ %182:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 182, implicit $exec, implicit $mode, implicit-def $m0
+ %183:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 183, implicit $exec, implicit $mode, implicit-def $m0
+ %184:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 184, implicit $exec, implicit $mode, implicit-def $m0
+ %185:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 185, implicit $exec, implicit $mode, implicit-def $m0
+ %186:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 186, implicit $exec, implicit $mode, implicit-def $m0
+ %187:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 187, implicit $exec, implicit $mode, implicit-def $m0
+ %188:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 188, implicit $exec, implicit $mode, implicit-def $m0
+ %189:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 189, implicit $exec, implicit $mode, implicit-def $m0
+ %190:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 190, implicit $exec, implicit $mode, implicit-def $m0
+ %191:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 191, implicit $exec, implicit $mode, implicit-def $m0
+ %192:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 192, implicit $exec, implicit $mode, implicit-def $m0
+ %193:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 193, implicit $exec, implicit $mode, implicit-def $m0
+ %194:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 194, implicit $exec, implicit $mode, implicit-def $m0
+ %195:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 195, implicit $exec, implicit $mode, implicit-def $m0
+ %196:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 196, implicit $exec, implicit $mode, implicit-def $m0
+ %197:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 197, implicit $exec, implicit $mode, implicit-def $m0
+ %198:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 198, implicit $exec, implicit $mode, implicit-def $m0
+ %199:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 199, implicit $exec, implicit $mode, implicit-def $m0
+ %200:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 200, implicit $exec, implicit $mode, implicit-def $m0
+ %201:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 201, implicit $exec, implicit $mode, implicit-def $m0
+ %202:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 202, implicit $exec, implicit $mode, implicit-def $m0
+ %203:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 203, implicit $exec, implicit $mode, implicit-def $m0
+ %204:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 204, implicit $exec, implicit $mode, implicit-def $m0
+ %205:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 205, implicit $exec, implicit $mode, implicit-def $m0
+ %206:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 206, implicit $exec, implicit $mode, implicit-def $m0
+ %207:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 207, implicit $exec, implicit $mode, implicit-def $m0
+ %208:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 208, implicit $exec, implicit $mode, implicit-def $m0
+ %209:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 209, implicit $exec, implicit $mode, implicit-def $m0
+ %210:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 210, implicit $exec, implicit $mode, implicit-def $m0
+ %211:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 211, implicit $exec, implicit $mode, implicit-def $m0
+ %212:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 212, implicit $exec, implicit $mode, implicit-def $m0
+ %213:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 213, implicit $exec, implicit $mode, implicit-def $m0
+ %214:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 214, implicit $exec, implicit $mode, implicit-def $m0
+ %215:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 215, implicit $exec, implicit $mode, implicit-def $m0
+ %216:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 216, implicit $exec, implicit $mode, implicit-def $m0
+ %217:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 217, implicit $exec, implicit $mode, implicit-def $m0
+ %218:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 218, implicit $exec, implicit $mode, implicit-def $m0
+ %219:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 219, implicit $exec, implicit $mode, implicit-def $m0
+ %220:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 220, implicit $exec, implicit $mode, implicit-def $m0
+ %221:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 221, implicit $exec, implicit $mode, implicit-def $m0
+ %222:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 222, implicit $exec, implicit $mode, implicit-def $m0
+ %223:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 223, implicit $exec, implicit $mode, implicit-def $m0
+ %224:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 224, implicit $exec, implicit $mode, implicit-def $m0
+ %225:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 225, implicit $exec, implicit $mode, implicit-def $m0
+ %226:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 226, implicit $exec, implicit $mode, implicit-def $m0
+ %227:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 227, implicit $exec, implicit $mode, implicit-def $m0
+ %228:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 228, implicit $exec, implicit $mode, implicit-def $m0
+ %229:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 229, implicit $exec, implicit $mode, implicit-def $m0
+ %230:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 230, implicit $exec, implicit $mode, implicit-def $m0
+ %231:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 231, implicit $exec, implicit $mode, implicit-def $m0
+ %232:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 232, implicit $exec, implicit $mode, implicit-def $m0
+ %233:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 233, implicit $exec, implicit $mode, implicit-def $m0
+ %234:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 234, implicit $exec, implicit $mode, implicit-def $m0
+ %235:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 235, implicit $exec, implicit $mode, implicit-def $m0
+ %236:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 236, implicit $exec, implicit $mode, implicit-def $m0
+ %237:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 237, implicit $exec, implicit $mode, implicit-def $m0
+ %238:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 238, implicit $exec, implicit $mode, implicit-def $m0
+ %239:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 239, implicit $exec, implicit $mode, implicit-def $m0
+ %240:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 240, implicit $exec, implicit $mode, implicit-def $m0
+ %241:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 241, implicit $exec, implicit $mode, implicit-def $m0
+ %242:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 242, implicit $exec, implicit $mode, implicit-def $m0
+ %243:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 243, implicit $exec, implicit $mode, implicit-def $m0
+ %244:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 244, implicit $exec, implicit $mode, implicit-def $m0
+ %245:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 245, implicit $exec, implicit $mode, implicit-def $m0
+ %246:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 246, implicit $exec, implicit $mode, implicit-def $m0
+ %247:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 247, implicit $exec, implicit $mode, implicit-def $m0
+ %248:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 248, implicit $exec, implicit $mode, implicit-def $m0
+ %249:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 249, implicit $exec, implicit $mode, implicit-def $m0
+ %250:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 250, implicit $exec, implicit $mode, implicit-def $m0
+ %251:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 251, implicit $exec, implicit $mode, implicit-def $m0
+ %252:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 252, implicit $exec, implicit $mode, implicit-def $m0
+ %253:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 253, implicit $exec, implicit $mode, implicit-def $m0
+ %254:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 254, implicit $exec, implicit $mode, implicit-def $m0
+
+ bb.2:
+
+ S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9
+ S_NOP 0, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19
+ S_NOP 0, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29
+ S_NOP 0, implicit %30, implicit %31, implicit %32, implicit %33, implicit %34, implicit %35, implicit %36, implicit %37, implicit %38, implicit %39
+ S_NOP 0, implicit %40, implicit %41, implicit %42, implicit %43, implicit %44, implicit %45, implicit %46, implicit %47, implicit %48, implicit %49
+ S_NOP 0, implicit %50, implicit %51, implicit %52, implicit %53, implicit %54, implicit %55, implicit %56, implicit %57, implicit %58, implicit %59
+ S_NOP 0, implicit %60, implicit %61, implicit %62, implicit %63, implicit %64, implicit %65, implicit %66, implicit %67, implicit %68, implicit %69
+ S_NOP 0, implicit %70, implicit %71, implicit %72, implicit %73, implicit %74, implicit %75, implicit %76, implicit %77, implicit %78, implicit %79
+ S_NOP 0, implicit %80, implicit %81, implicit %82, implicit %83, implicit %84, implicit %85, implicit %86, implicit %87, implicit %88, implicit %89
+ S_NOP 0, implicit %90, implicit %91, implicit %92, implicit %93, implicit %94, implicit %95, implicit %96, implicit %97, implicit %98, implicit %99
+ S_NOP 0, implicit %100, implicit %101, implicit %102, implicit %103, implicit %104, implicit %105, implicit %106, implicit %107, implicit %108, implicit %109
+ S_NOP 0, implicit %110, implicit %111, implicit %112, implicit %113, implicit %114, implicit %115, implicit %116, implicit %117, implicit %118, implicit %119
+ S_NOP 0, implicit %120, implicit %121, implicit %122, implicit %123, implicit %124, implicit %125, implicit %126, implicit %127, implicit %128, implicit %129
+ S_NOP 0, implicit %130, implicit %131, implicit %132, implicit %133, implicit %134, implicit %135, implicit %136, implicit %137, implicit %138, implicit %139
+ S_NOP 0, implicit %140, implicit %141, implicit %142, implicit %143, implicit %144, implicit %145, implicit %146, implicit %147, implicit %148, implicit %149
+ S_NOP 0, implicit %150, implicit %151, implicit %152, implicit %153, implicit %154, implicit %155, implicit %156, implicit %157, implicit %158, implicit %159
+ S_NOP 0, implicit %160, implicit %161, implicit %162, implicit %163, implicit %164, implicit %165, implicit %166, implicit %167, implicit %168, implicit %169
+ S_NOP 0, implicit %170, implicit %171, implicit %172, implicit %173, implicit %174, implicit %175, implicit %176, implicit %177, implicit %178, implicit %179
+ S_NOP 0, implicit %180, implicit %181, implicit %182, implicit %183, implicit %184, implicit %185, implicit %186, implicit %187, implicit %188, implicit %189
+ S_NOP 0, implicit %190, implicit %191, implicit %192, implicit %193, implicit %194, implicit %195, implicit %196, implicit %197, implicit %198, implicit %199
+ S_NOP 0, implicit %200, implicit %201, implicit %202, implicit %203, implicit %204, implicit %205, implicit %206, implicit %207, implicit %208, implicit %209
+ S_NOP 0, implicit %210, implicit %211, implicit %212, implicit %213, implicit %214, implicit %215, implicit %216, implicit %217, implicit %218, implicit %219
+ S_NOP 0, implicit %220, implicit %221, implicit %222, implicit %223, implicit %224, implicit %225, implicit %226, implicit %227, implicit %228, implicit %229
+ S_NOP 0, implicit %230, implicit %231, implicit %232, implicit %233, implicit %234, implicit %235, implicit %236, implicit %237, implicit %238, implicit %239
+ S_NOP 0, implicit %240, implicit %241, implicit %242, implicit %243, implicit %244, implicit %245, implicit %246, implicit %247, implicit %248, implicit %249
+ S_NOP 0, implicit %250, implicit %251, implicit %252, implicit %253, implicit %254
+
+ S_NOP 0, implicit %255, implicit %256
+
+ S_ENDPGM 0
+...
+---
name: test_no_sink_two_subregs_in_def_block
tracksRegLiveness: true
machineFunctionInfo:
@@ -5544,12 +6142,12 @@ body: |
S_ENDPGM 0
...
---
-name: test_occ_9_no_sink_one_def_of_undef_subreg
+name: test_occ_7_sink_one_def_of_undef_subreg_for_8
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
- ; GFX908-LABEL: name: test_occ_9_no_sink_one_def_of_undef_subreg
+ ; GFX908-LABEL: name: test_occ_7_sink_one_def_of_undef_subreg_for_8
; GFX908: bb.0:
; GFX908-NEXT: successors: %bb.1(0x80000000)
; GFX908-NEXT: {{ $}}
@@ -5576,16 +6174,22 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: undef [[V_MOV_B32_e32_:%[0-9]+]].sub1:vreg_64_align2 = V_MOV_B32_e32 23, implicit $exec
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
- ; GFX908-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_]].sub1
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_5]]
@@ -5597,7 +6201,13 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_22]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_24]], implicit [[V_CVT_I32_F64_e32_25]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]]
+ ; GFX908-NEXT: undef [[V_MOV_B32_e32_:%[0-9]+]].sub1:vreg_64_align2 = V_MOV_B32_e32 23, implicit $exec
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_]].sub1
; GFX908-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1
@@ -5625,17 +6235,24 @@ body: |
%20:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
%21:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
%22:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
- undef %23.sub1:vreg_64_align2 = V_MOV_B32_e32 23, implicit $exec
+ %23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ %25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ %26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ %27:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
+ %28:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
+ %29:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
+ %30:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
+ %31:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
+ undef %32.sub1:vreg_64_align2 = V_MOV_B32_e32 23, implicit $exec
bb.1:
successors: %bb.2
- %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
- S_NOP 0, implicit %24
+ S_NOP 0, implicit %0
bb.2:
- S_NOP 0, implicit %23.sub1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
S_NOP 0, implicit %4, implicit %5
@@ -5647,7 +6264,12 @@ body: |
S_NOP 0, implicit %16, implicit %17
S_NOP 0, implicit %18, implicit %19
S_NOP 0, implicit %20, implicit %21
- S_NOP 0, implicit %22
+ S_NOP 0, implicit %22, implicit %23
+ S_NOP 0, implicit %24, implicit %25
+ S_NOP 0, implicit %26, implicit %27
+ S_NOP 0, implicit %28, implicit %29
+ S_NOP 0, implicit %30, implicit %31
+ S_NOP 0, implicit %32.sub1
S_ENDPGM 0
...
---
@@ -5866,1227 +6488,673 @@ body: |
S_NOP 0, implicit %22
S_ENDPGM 0
...
-
---
-name: test_occ_8_physreg_use
+name: test_live_through_occ_7_sink_for_8
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
- ; GFX908-LABEL: name: test_occ_8_physreg_use
+ ; GFX908-LABEL: name: test_live_through_occ_7_sink_for_8
; GFX908: bb.0:
; GFX908-NEXT: successors: %bb.1(0x80000000)
- ; GFX908-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
- ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
- ; GFX908-NEXT: $vgpr8 = IMPLICIT_DEF
- ; GFX908-NEXT: $vgpr9 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
- ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 $vgpr8, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 $vgpr9, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_BRANCH %bb.1
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF29]]
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_31]], implicit [[DEF30]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_11]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_12]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_13]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_6]], implicit [[V_CVT_I32_F32_e32_14]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_7]], implicit [[V_CVT_I32_F32_e32_15]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_8]], implicit [[V_CVT_I32_F32_e32_16]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_9]], implicit [[V_CVT_I32_F32_e32_17]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_10]], implicit [[V_CVT_I32_F32_e32_18]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_19]], implicit [[V_CVT_I32_F32_e32_20]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_21]], implicit [[V_CVT_I32_F32_e32_22]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_21]], implicit [[V_CVT_I32_F32_e32_22]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_24]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_25]], implicit [[V_CVT_I32_F32_e32_26]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_27]], implicit [[V_CVT_I32_F32_e32_28]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_29]]
+ ; GFX908-NEXT: successors: %bb.2(0x80000000)
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: dead [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.2:
+ ; GFX908-NEXT: successors: %bb.3(0x80000000)
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_17]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_18]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_19]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_20]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_21]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_22]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_23]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_24]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_9]], implicit [[V_CVT_I32_F64_e32_25]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_26]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_27]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_28]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_14]], implicit [[V_CVT_I32_F64_e32_30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_31]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_32]]
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.3:
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_33]], implicit [[V_CVT_I32_F64_e32_]]
; GFX908-NEXT: S_ENDPGM 0
bb.0:
- liveins: $vgpr0, $sgpr0_sgpr1
+ successors: %bb.1
- %1:sgpr_64(p4) = COPY $sgpr0_sgpr1
- %2:vgpr_32(s32) = COPY $vgpr0
- %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- %4:sreg_64 = V_CMP_GT_U32_e64 %3.sub0, %2(s32), implicit $exec
- undef %5.sub1:sreg_64 = S_MOV_B32 0
- %5.sub0:sreg_64 = COPY %3.sub1
- $vgpr8 = IMPLICIT_DEF
- $vgpr9 = IMPLICIT_DEF
- %11:vgpr_32 = IMPLICIT_DEF
- %12:vgpr_32 = IMPLICIT_DEF
- %13:vgpr_32 = IMPLICIT_DEF
- %14:vgpr_32 = IMPLICIT_DEF
- %15:vgpr_32 = IMPLICIT_DEF
- %16:vgpr_32 = IMPLICIT_DEF
- %17:vgpr_32 = IMPLICIT_DEF
- %18:vgpr_32 = IMPLICIT_DEF
- %19:vgpr_32 = IMPLICIT_DEF
- %20:vgpr_32 = IMPLICIT_DEF
- %21:vgpr_32 = IMPLICIT_DEF
- %22:vgpr_32 = IMPLICIT_DEF
- %23:vgpr_32 = IMPLICIT_DEF
- %24:vgpr_32 = IMPLICIT_DEF
- %25:vgpr_32 = IMPLICIT_DEF
- %26:vgpr_32 = IMPLICIT_DEF
- %27:vgpr_32 = IMPLICIT_DEF
- %28:vgpr_32 = IMPLICIT_DEF
- %29:vgpr_32 = IMPLICIT_DEF
- %30:vgpr_32 = IMPLICIT_DEF
- %31:vgpr_32 = IMPLICIT_DEF
- %32:vgpr_32 = IMPLICIT_DEF
- %33:vgpr_32 = IMPLICIT_DEF
- %34:vgpr_32 = IMPLICIT_DEF
- %35:vgpr_32 = IMPLICIT_DEF
- %36:vgpr_32 = IMPLICIT_DEF
- %37:vgpr_32 = IMPLICIT_DEF
- %38:vgpr_32 = IMPLICIT_DEF
- %39:vgpr_32 = IMPLICIT_DEF
- %40:vgpr_32 = IMPLICIT_DEF
- %41:vgpr_32 = IMPLICIT_DEF
- %50:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 $vgpr8, implicit $exec, implicit $mode
- %51:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 $vgpr9, implicit $exec, implicit $mode
- %52:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %12, implicit $exec, implicit $mode
- %53:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %13, implicit $exec, implicit $mode
- %54:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %14, implicit $exec, implicit $mode
- %55:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %15, implicit $exec, implicit $mode
- %56:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %16, implicit $exec, implicit $mode
- %57:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %17, implicit $exec, implicit $mode
- %58:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %18, implicit $exec, implicit $mode
- %59:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %19, implicit $exec, implicit $mode
- %60:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %20, implicit $exec, implicit $mode
- %61:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %21, implicit $exec, implicit $mode
- %62:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %22, implicit $exec, implicit $mode
- %63:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %23, implicit $exec, implicit $mode
- %64:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %24, implicit $exec, implicit $mode
- %65:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %25, implicit $exec, implicit $mode
- %66:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %26, implicit $exec, implicit $mode
- %67:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %27, implicit $exec, implicit $mode
- %68:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %28, implicit $exec, implicit $mode
- %69:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %29, implicit $exec, implicit $mode
- %70:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %30, implicit $exec, implicit $mode
- %71:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %31, implicit $exec, implicit $mode
- %72:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %32, implicit $exec, implicit $mode
- %73:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %33, implicit $exec, implicit $mode
- %74:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %34, implicit $exec, implicit $mode
- %75:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %35, implicit $exec, implicit $mode
- %76:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %36, implicit $exec, implicit $mode
- %77:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %37, implicit $exec, implicit $mode
- %78:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %38, implicit $exec, implicit $mode
- %79:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %39, implicit $exec, implicit $mode
- %80:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %40, implicit $exec, implicit $mode
- %81:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %41, implicit $exec, implicit $mode
- S_BRANCH %bb.4
+ %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
+ %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
- bb.4:
+ bb.1:
+ successors: %bb.2
- S_NOP 0, implicit %50, implicit %60, implicit %20
- S_NOP 0, implicit %51, implicit %61, implicit %21
- S_NOP 0, implicit %52, implicit %62
- S_NOP 0, implicit %53, implicit %63
- S_NOP 0, implicit %54, implicit %64
- S_NOP 0, implicit %55, implicit %65
- S_NOP 0, implicit %56, implicit %66
- S_NOP 0, implicit %57, implicit %67
- S_NOP 0, implicit %58, implicit %68
- S_NOP 0, implicit %59, implicit %69
- S_NOP 0, implicit %70, implicit %71
- S_NOP 0, implicit %72, implicit %73
- S_NOP 0, implicit %72, implicit %73
- S_NOP 0, implicit %74, implicit %75
- S_NOP 0, implicit %76, implicit %77
- S_NOP 0, implicit %78, implicit %79
- S_NOP 0, implicit %80
+ %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ %3:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ %4:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
+ %5:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
+ %6:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
+ %7:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
+ %8:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
+ %9:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
+ %10:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ %11:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
+ %12:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ %13:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
+ %14:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
+ %15:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
+ %16:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
+ %17:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ %18:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
+ %19:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ %20:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ %21:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ %22:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ %23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ %25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ %26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ %27:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
+ %28:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
+ %29:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
+ %30:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
+ %31:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
+ %32:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
+ %33:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode, implicit-def $m0
+
+ bb.2:
+ successors: %bb.3
+
+ S_NOP 0, implicit %2, implicit %18
+ S_NOP 0, implicit %3, implicit %19
+ S_NOP 0, implicit %4, implicit %20
+ S_NOP 0, implicit %5, implicit %21
+ S_NOP 0, implicit %6, implicit %22
+ S_NOP 0, implicit %7, implicit %23
+ S_NOP 0, implicit %8, implicit %24
+ S_NOP 0, implicit %9, implicit %25
+ S_NOP 0, implicit %10, implicit %26
+ S_NOP 0, implicit %11, implicit %27
+ S_NOP 0, implicit %12, implicit %28
+ S_NOP 0, implicit %13, implicit %29
+ S_NOP 0, implicit %14, implicit %30
+ S_NOP 0, implicit %15, implicit %31
+ S_NOP 0, implicit %16, implicit %32
+ S_NOP 0, implicit %33
+
+ bb.3:
+ S_NOP 0, implicit %0, implicit %1
S_ENDPGM 0
...
-
---
-name: test_occ_8_exec_use
+name: test_remat_over_exec_modif
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
- ; GFX908-LABEL: name: test_occ_8_exec_use
+ ; GFX908-LABEL: name: test_remat_over_exec_modif
; GFX908: bb.0:
; GFX908-NEXT: successors: %bb.1(0x80000000)
- ; GFX908-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
+ ; GFX908-NEXT: liveins: $sgpr2_sgpr3
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
- ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
- ; GFX908-NEXT: dead [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
- ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
- ; GFX908-NEXT: $vgpr8 = IMPLICIT_DEF
- ; GFX908-NEXT: $vgpr9 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_BRANCH %bb.1
+ ; GFX908-NEXT: %new_exec:sgpr_64 = COPY $sgpr2_sgpr3
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: %save_exec:sreg_64 = S_MOV_B64 $exec
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: $exec = S_MOV_B64 %new_exec
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
- ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 255
- ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 [[S_MOV_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_30]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF29]]
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 2, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_31]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF30]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_9]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_10]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_11]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_12]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_13]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_6]], implicit [[V_CVT_I32_F32_e32_14]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_7]], implicit [[V_CVT_I32_F32_e32_15]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_8]], implicit [[V_CVT_I32_F32_e32_16]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_17]], implicit [[V_CVT_I32_F32_e32_18]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_19]], implicit [[V_CVT_I32_F32_e32_20]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_19]], implicit [[V_CVT_I32_F32_e32_20]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_21]], implicit [[V_CVT_I32_F32_e32_22]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_24]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_25]], implicit [[V_CVT_I32_F32_e32_26]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_27]]
- ; GFX908-NEXT: $exec = S_MOV_B64 [[S_AND_SAVEEXEC_B64_]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_16]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_17]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_18]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_19]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_20]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_21]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_22]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_23]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_24]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_9]], implicit [[V_CVT_I32_F64_e32_25]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_26]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_27]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_28]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_14]], implicit [[V_CVT_I32_F64_e32_30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_31]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_32]]
+ ; GFX908-NEXT: $exec = S_MOV_B64 %save_exec
; GFX908-NEXT: S_ENDPGM 0
bb.0:
- liveins: $vgpr0, $sgpr0_sgpr1
+ liveins: $sgpr2_sgpr3
- %1:sgpr_64(p4) = COPY $sgpr0_sgpr1
- %2:vgpr_32(s32) = COPY $vgpr0
- %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- %4:sreg_64 = V_CMP_GT_U32_e64 %3.sub0, %2(s32), implicit $exec
- undef %5.sub1:sreg_64 = S_MOV_B32 0
- %5.sub0:sreg_64 = COPY %3.sub1
- $vgpr8 = IMPLICIT_DEF
- $vgpr9 = IMPLICIT_DEF
- %11:vgpr_32 = IMPLICIT_DEF
- %12:vgpr_32 = IMPLICIT_DEF
- %13:vgpr_32 = IMPLICIT_DEF
- %14:vgpr_32 = IMPLICIT_DEF
- %15:vgpr_32 = IMPLICIT_DEF
- %16:vgpr_32 = IMPLICIT_DEF
- %17:vgpr_32 = IMPLICIT_DEF
- %18:vgpr_32 = IMPLICIT_DEF
- %19:vgpr_32 = IMPLICIT_DEF
- %20:vgpr_32 = IMPLICIT_DEF
- %21:vgpr_32 = IMPLICIT_DEF
- %22:vgpr_32 = IMPLICIT_DEF
- %23:vgpr_32 = IMPLICIT_DEF
- %24:vgpr_32 = IMPLICIT_DEF
- %25:vgpr_32 = IMPLICIT_DEF
- %26:vgpr_32 = IMPLICIT_DEF
- %27:vgpr_32 = IMPLICIT_DEF
- %28:vgpr_32 = IMPLICIT_DEF
- %29:vgpr_32 = IMPLICIT_DEF
- %30:vgpr_32 = IMPLICIT_DEF
- %31:vgpr_32 = IMPLICIT_DEF
- %32:vgpr_32 = IMPLICIT_DEF
- %33:vgpr_32 = IMPLICIT_DEF
- %34:vgpr_32 = IMPLICIT_DEF
- %35:vgpr_32 = IMPLICIT_DEF
- %36:vgpr_32 = IMPLICIT_DEF
- %37:vgpr_32 = IMPLICIT_DEF
- %38:vgpr_32 = IMPLICIT_DEF
- %39:vgpr_32 = IMPLICIT_DEF
- %40:vgpr_32 = IMPLICIT_DEF
- %41:vgpr_32 = IMPLICIT_DEF
- %50:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode
- %51:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 2, implicit $exec, implicit $mode
- %52:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %12, implicit $exec, implicit $mode
- %53:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %13, implicit $exec, implicit $mode
- %54:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %14, implicit $exec, implicit $mode
- %55:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %15, implicit $exec, implicit $mode
- %56:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %16, implicit $exec, implicit $mode
- %57:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %17, implicit $exec, implicit $mode
- %58:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %18, implicit $exec, implicit $mode
- %59:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %19, implicit $exec, implicit $mode
- %60:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %20, implicit $exec, implicit $mode
- %61:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %21, implicit $exec, implicit $mode
- %62:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %22, implicit $exec, implicit $mode
- %63:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %23, implicit $exec, implicit $mode
- %64:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %24, implicit $exec, implicit $mode
- %65:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %25, implicit $exec, implicit $mode
- %66:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %26, implicit $exec, implicit $mode
- %67:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %27, implicit $exec, implicit $mode
- %68:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %28, implicit $exec, implicit $mode
- %69:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %29, implicit $exec, implicit $mode
- %70:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %30, implicit $exec, implicit $mode
- %71:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %31, implicit $exec, implicit $mode
- %72:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %32, implicit $exec, implicit $mode
- %73:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %33, implicit $exec, implicit $mode
- %74:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %34, implicit $exec, implicit $mode
- %75:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %35, implicit $exec, implicit $mode
- %76:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %36, implicit $exec, implicit $mode
- %77:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %37, implicit $exec, implicit $mode
- %78:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %38, implicit $exec, implicit $mode
- %79:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %39, implicit $exec, implicit $mode
- %80:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %40, implicit $exec, implicit $mode
- %81:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %41, implicit $exec, implicit $mode
- S_BRANCH %bb.4
+ %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
+ %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ %3:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ %4:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
+ %5:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
+ %6:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
+ %7:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
+ %8:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
+ %9:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
+ %10:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ %11:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
+ %12:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ %13:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
+ %14:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
+ %15:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
+ %16:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
+ %17:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ %18:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
+ %19:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ %20:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ %21:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ %22:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ %23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ %25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ %26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ %27:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
+ %28:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
+ %29:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
+ %30:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
+ %31:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
+ %32:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
- bb.4:
+ %save_exec:sreg_64 = S_MOV_B64 $exec
+ %new_exec:sgpr_64 = COPY $sgpr2_sgpr3
+ $exec = S_MOV_B64 %new_exec
- %100:sreg_64 = S_MOV_B64 255
- %101:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed %100, implicit-def $exec, implicit-def $scc, implicit $exec
- S_NOP 0, implicit %50, implicit %60, implicit %20
- S_NOP 0, implicit %51, implicit %61, implicit %21
- S_NOP 0, implicit %52, implicit %62
- S_NOP 0, implicit %53, implicit %63
- S_NOP 0, implicit %54, implicit %64
- S_NOP 0, implicit %55, implicit %65
- S_NOP 0, implicit %56, implicit %66
- S_NOP 0, implicit %57, implicit %67
- S_NOP 0, implicit %58, implicit %68
- S_NOP 0, implicit %59, implicit %69
- S_NOP 0, implicit %70, implicit %71
- S_NOP 0, implicit %72, implicit %73
- S_NOP 0, implicit %72, implicit %73
- S_NOP 0, implicit %74, implicit %75
- S_NOP 0, implicit %76, implicit %77
- S_NOP 0, implicit %78, implicit %79
- S_NOP 0, implicit %80
- $exec = S_MOV_B64 %101:sreg_64_xexec
+ bb.1:
+
+ S_NOP 0, implicit %0, implicit %16
+ S_NOP 0, implicit %1, implicit %17
+ S_NOP 0, implicit %2, implicit %18
+ S_NOP 0, implicit %3, implicit %19
+ S_NOP 0, implicit %4, implicit %20
+ S_NOP 0, implicit %5, implicit %21
+ S_NOP 0, implicit %6, implicit %22
+ S_NOP 0, implicit %7, implicit %23
+ S_NOP 0, implicit %8, implicit %24
+ S_NOP 0, implicit %9, implicit %25
+ S_NOP 0, implicit %10, implicit %26
+ S_NOP 0, implicit %11, implicit %27
+ S_NOP 0, implicit %12, implicit %28
+ S_NOP 0, implicit %13, implicit %29
+ S_NOP 0, implicit %14, implicit %30
+ S_NOP 0, implicit %15, implicit %31
+ S_NOP 0, implicit %32
+
+ $exec = S_MOV_B64 %save_exec
S_ENDPGM 0
...
-
---
-name: remat_virtual_vgpr_occ_6
+name: test_rollback_remat_defregion_above_target
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
- ; GFX908-LABEL: name: remat_virtual_vgpr_occ_6
+ ; GFX908-LABEL: name: test_rollback_remat_defregion_above_target
; GFX908: bb.0:
; GFX908-NEXT: successors: %bb.1(0x80000000)
- ; GFX908-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
- ; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
- ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- ; GFX908-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF31:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
- ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
- ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
- ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
- ; GFX908-NEXT: S_BRANCH %bb.1
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: bb.1:
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_30]], implicit [[V_CVT_I32_F32_e32_25]], implicit [[DEF22]], implicit [[DEF27]]
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_31]], implicit [[V_CVT_I32_F32_e32_26]], implicit [[DEF23]], implicit [[DEF28]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_22]], implicit [[V_CVT_I32_F32_e32_27]], implicit [[DEF24]], implicit [[DEF29]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF25]], implicit [[DEF30]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_24]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF26]], implicit [[DEF31]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_6]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_7]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_8]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_9]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_10]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_11]], implicit [[V_CVT_I32_F32_e32_12]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_13]], implicit [[V_CVT_I32_F32_e32_14]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_13]], implicit [[V_CVT_I32_F32_e32_14]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_15]], implicit [[V_CVT_I32_F32_e32_16]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_17]], implicit [[V_CVT_I32_F32_e32_18]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_19]], implicit [[V_CVT_I32_F32_e32_20]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_21]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.1:
+ ; GFX908-NEXT: successors: %bb.2(0x80000000)
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]]
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.2:
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]]
; GFX908-NEXT: S_ENDPGM 0
bb.0:
- liveins: $vgpr0, $sgpr0_sgpr1
+ successors: %bb.1
- %1:sgpr_64(p4) = COPY $sgpr0_sgpr1
- %2:vgpr_32(s32) = COPY $vgpr0
- %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- %4:sreg_64 = V_CMP_GT_U32_e64 %3.sub0, %2(s32), implicit $exec
- undef %5.sub1:sreg_64 = S_MOV_B32 0
- %5.sub0:sreg_64 = COPY %3.sub1
- %10:vgpr_32 = IMPLICIT_DEF
- %11:vgpr_32 = IMPLICIT_DEF
- %12:vgpr_32 = IMPLICIT_DEF
- %13:vgpr_32 = IMPLICIT_DEF
- %14:vgpr_32 = IMPLICIT_DEF
- %15:vgpr_32 = IMPLICIT_DEF
- %16:vgpr_32 = IMPLICIT_DEF
- %17:vgpr_32 = IMPLICIT_DEF
- %18:vgpr_32 = IMPLICIT_DEF
- %19:vgpr_32 = IMPLICIT_DEF
- %20:vgpr_32 = IMPLICIT_DEF
- %21:vgpr_32 = IMPLICIT_DEF
- %22:vgpr_32 = IMPLICIT_DEF
- %23:vgpr_32 = IMPLICIT_DEF
- %24:vgpr_32 = IMPLICIT_DEF
- %25:vgpr_32 = IMPLICIT_DEF
- %26:vgpr_32 = IMPLICIT_DEF
- %27:vgpr_32 = IMPLICIT_DEF
- %28:vgpr_32 = IMPLICIT_DEF
- %29:vgpr_32 = IMPLICIT_DEF
- %30:vgpr_32 = IMPLICIT_DEF
- %31:vgpr_32 = IMPLICIT_DEF
- %32:vgpr_32 = IMPLICIT_DEF
- %33:vgpr_32 = IMPLICIT_DEF
- %34:vgpr_32 = IMPLICIT_DEF
- %35:vgpr_32 = IMPLICIT_DEF
- %36:vgpr_32 = IMPLICIT_DEF
- %37:vgpr_32 = IMPLICIT_DEF
- %38:vgpr_32 = IMPLICIT_DEF
- %39:vgpr_32 = IMPLICIT_DEF
- %40:vgpr_32 = IMPLICIT_DEF
- %41:vgpr_32 = IMPLICIT_DEF
- %50:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %10, implicit $exec, implicit $mode
- %51:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %11, implicit $exec, implicit $mode
- %52:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %12, implicit $exec, implicit $mode
- %53:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %13, implicit $exec, implicit $mode
- %54:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %14, implicit $exec, implicit $mode
- %55:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %15, implicit $exec, implicit $mode
- %56:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %16, implicit $exec, implicit $mode
- %57:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %17, implicit $exec, implicit $mode
- %58:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %18, implicit $exec, implicit $mode
- %59:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %19, implicit $exec, implicit $mode
- %60:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %20, implicit $exec, implicit $mode
- %61:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %21, implicit $exec, implicit $mode
- %62:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %22, implicit $exec, implicit $mode
- %63:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %23, implicit $exec, implicit $mode
- %64:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %24, implicit $exec, implicit $mode
- %65:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %25, implicit $exec, implicit $mode
- %66:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %26, implicit $exec, implicit $mode
- %67:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %27, implicit $exec, implicit $mode
- %68:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %28, implicit $exec, implicit $mode
- %69:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %29, implicit $exec, implicit $mode
- %70:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %30, implicit $exec, implicit $mode
- %71:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %31, implicit $exec, implicit $mode
- %72:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %32, implicit $exec, implicit $mode
- %73:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %33, implicit $exec, implicit $mode
- %74:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %34, implicit $exec, implicit $mode
- %75:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %35, implicit $exec, implicit $mode
- %76:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %36, implicit $exec, implicit $mode
- %77:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %37, implicit $exec, implicit $mode
- %78:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %38, implicit $exec, implicit $mode
- %79:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %39, implicit $exec, implicit $mode
- %80:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %40, implicit $exec, implicit $mode
- %81:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %41, implicit $exec, implicit $mode
- S_BRANCH %bb.4
+ %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
+ %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ %3:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ %4:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
+ %5:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
+ %6:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
+ %7:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
+ %8:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
+ %9:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
+ %10:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ %11:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
+ %12:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ %13:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
+ %14:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
+ %15:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
+ %16:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
+ %17:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ %18:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
+ %19:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ %20:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ %21:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ %22:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ %23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ %25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ %26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ %27:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
+ %28:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
+ %29:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
+ %30:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
+ %31:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
+ %32:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
+
+ bb.1:
+ successors: %bb.2
+
+ S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4,
+ S_NOP 0, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9
+ S_NOP 0, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14
+ S_NOP 0, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19
+ S_NOP 0, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24
+ S_NOP 0, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29
+ S_NOP 0, implicit %30, implicit %31, implicit %32
+
+ bb.2:
+
+ S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4,
+ S_NOP 0, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9
+ S_NOP 0, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14
+ S_NOP 0, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19
+ S_NOP 0, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24
+ S_NOP 0, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29
+ S_NOP 0, implicit %30, implicit %31
- bb.4:
- S_NOP 0, implicit %50, implicit %60, implicit %10, implicit %20
- S_NOP 0, implicit %51, implicit %61, implicit %11, implicit %21
- S_NOP 0, implicit %52, implicit %62, implicit %12, implicit %22
- S_NOP 0, implicit %53, implicit %63, implicit %13, implicit %23
- S_NOP 0, implicit %54, implicit %64, implicit %14, implicit %24
- S_NOP 0, implicit %55, implicit %65
- S_NOP 0, implicit %56, implicit %66
- S_NOP 0, implicit %57, implicit %67
- S_NOP 0, implicit %58, implicit %68
- S_NOP 0, implicit %59, implicit %69
- S_NOP 0, implicit %70, implicit %71
- S_NOP 0, implicit %72, implicit %73
- S_NOP 0, implicit %72, implicit %73
- S_NOP 0, implicit %74, implicit %75
- S_NOP 0, implicit %76, implicit %77
- S_NOP 0, implicit %78, implicit %79
- S_NOP 0, implicit %80
S_ENDPGM 0
...
-
---
-name: remat_virtual_vgpr_uses_not_available
+name: test_rollback_remat_useregion_above_target
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
- ; GFX908-LABEL: name: remat_virtual_vgpr_uses_not_available
+ ; GFX908-LABEL: name: test_rollback_remat_useregion_above_target
; GFX908: bb.0:
; GFX908-NEXT: successors: %bb.1(0x80000000)
- ; GFX908-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
- ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- ; GFX908-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF31:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
- ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
- ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
- ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
- ; GFX908-NEXT: S_BRANCH %bb.1
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_6]], implicit [[DEF22]], implicit [[DEF27]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_22]], implicit [[V_CVT_I32_F32_e32_27]], implicit [[DEF23]], implicit [[DEF28]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF24]], implicit [[DEF29]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_24]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF25]], implicit [[DEF30]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_25]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF26]], implicit [[DEF31]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_26]], implicit [[V_CVT_I32_F32_e32_31]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_7]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_8]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_9]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_10]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_11]], implicit [[V_CVT_I32_F32_e32_12]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_13]], implicit [[V_CVT_I32_F32_e32_14]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_13]], implicit [[V_CVT_I32_F32_e32_14]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_15]], implicit [[V_CVT_I32_F32_e32_16]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_17]], implicit [[V_CVT_I32_F32_e32_18]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_19]], implicit [[V_CVT_I32_F32_e32_20]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_21]]
+ ; GFX908-NEXT: successors: %bb.2(0x80000000)
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_34:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 34, implicit $exec, implicit $mode
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.2:
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_33]], implicit [[V_CVT_I32_F64_e32_34]]
; GFX908-NEXT: S_ENDPGM 0
bb.0:
- liveins: $vgpr0, $sgpr0_sgpr1
+ successors: %bb.1
- %1:sgpr_64(p4) = COPY $sgpr0_sgpr1
- %2:vgpr_32(s32) = COPY $vgpr0
- %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- %4:sreg_64 = V_CMP_GT_U32_e64 %3.sub0, %2(s32), implicit $exec
- undef %5.sub1:sreg_64 = S_MOV_B32 0
- %5.sub0:sreg_64 = COPY %3.sub1
- %10:vgpr_32 = IMPLICIT_DEF
- %11:vgpr_32 = IMPLICIT_DEF
- %12:vgpr_32 = IMPLICIT_DEF
- %13:vgpr_32 = IMPLICIT_DEF
- %14:vgpr_32 = IMPLICIT_DEF
- %15:vgpr_32 = IMPLICIT_DEF
- %16:vgpr_32 = IMPLICIT_DEF
- %17:vgpr_32 = IMPLICIT_DEF
- %18:vgpr_32 = IMPLICIT_DEF
- %19:vgpr_32 = IMPLICIT_DEF
- %20:vgpr_32 = IMPLICIT_DEF
- %21:vgpr_32 = IMPLICIT_DEF
- %22:vgpr_32 = IMPLICIT_DEF
- %23:vgpr_32 = IMPLICIT_DEF
- %24:vgpr_32 = IMPLICIT_DEF
- %25:vgpr_32 = IMPLICIT_DEF
- %26:vgpr_32 = IMPLICIT_DEF
- %27:vgpr_32 = IMPLICIT_DEF
- %28:vgpr_32 = IMPLICIT_DEF
- %29:vgpr_32 = IMPLICIT_DEF
- %30:vgpr_32 = IMPLICIT_DEF
- %31:vgpr_32 = IMPLICIT_DEF
- %32:vgpr_32 = IMPLICIT_DEF
- %33:vgpr_32 = IMPLICIT_DEF
- %34:vgpr_32 = IMPLICIT_DEF
- %35:vgpr_32 = IMPLICIT_DEF
- %36:vgpr_32 = IMPLICIT_DEF
- %37:vgpr_32 = IMPLICIT_DEF
- %38:vgpr_32 = IMPLICIT_DEF
- %39:vgpr_32 = IMPLICIT_DEF
- %40:vgpr_32 = IMPLICIT_DEF
- %41:vgpr_32 = IMPLICIT_DEF
- %50:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %10, implicit $exec, implicit $mode
- %51:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %11, implicit $exec, implicit $mode
- %52:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %12, implicit $exec, implicit $mode
- %53:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %13, implicit $exec, implicit $mode
- %54:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %14, implicit $exec, implicit $mode
- %55:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %15, implicit $exec, implicit $mode
- %56:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %16, implicit $exec, implicit $mode
- %57:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %17, implicit $exec, implicit $mode
- %58:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %18, implicit $exec, implicit $mode
- %59:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %19, implicit $exec, implicit $mode
- %60:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %20, implicit $exec, implicit $mode
- %61:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %21, implicit $exec, implicit $mode
- %62:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %22, implicit $exec, implicit $mode
- %63:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %23, implicit $exec, implicit $mode
- %64:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %24, implicit $exec, implicit $mode
- %65:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %25, implicit $exec, implicit $mode
- %66:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %26, implicit $exec, implicit $mode
- %67:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %27, implicit $exec, implicit $mode
- %68:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %28, implicit $exec, implicit $mode
- %69:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %29, implicit $exec, implicit $mode
- %70:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %30, implicit $exec, implicit $mode
- %71:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %31, implicit $exec, implicit $mode
- %72:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %32, implicit $exec, implicit $mode
- %73:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %33, implicit $exec, implicit $mode
- %74:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %34, implicit $exec, implicit $mode
- %75:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %35, implicit $exec, implicit $mode
- %76:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %36, implicit $exec, implicit $mode
- %77:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %37, implicit $exec, implicit $mode
- %78:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %38, implicit $exec, implicit $mode
- %79:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %39, implicit $exec, implicit $mode
- %80:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %40, implicit $exec, implicit $mode
- %81:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %41, implicit $exec, implicit $mode
- S_BRANCH %bb.4
+ %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
+ %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ %3:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ %4:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
+ %5:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
+ %6:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
+ %7:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
+ %8:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
+ %9:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
+ %10:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ %11:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
+ %12:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ %13:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
+ %14:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
+ %15:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
+ %16:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
+ %17:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ %18:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
+ %19:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ %20:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ %21:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ %22:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ %23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ %25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ %26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ %27:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
+ %28:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
+ %29:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
+ %30:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
+ %31:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
- bb.4:
- S_NOP 0, implicit %50, implicit %60, implicit %11, implicit %21
- S_NOP 0, implicit %51, implicit %61, implicit %12, implicit %22
- S_NOP 0, implicit %52, implicit %62, implicit %13, implicit %23
- S_NOP 0, implicit %53, implicit %63, implicit %14, implicit %24
- S_NOP 0, implicit %54, implicit %64, implicit %15, implicit %25
- S_NOP 0, implicit %55, implicit %65
- S_NOP 0, implicit %56, implicit %66
- S_NOP 0, implicit %57, implicit %67
- S_NOP 0, implicit %58, implicit %68
- S_NOP 0, implicit %59, implicit %69
- S_NOP 0, implicit %70, implicit %71
- S_NOP 0, implicit %72, implicit %73
- S_NOP 0, implicit %72, implicit %73
- S_NOP 0, implicit %74, implicit %75
- S_NOP 0, implicit %76, implicit %77
- S_NOP 0, implicit %78, implicit %79
- S_NOP 0, implicit %80
- S_ENDPGM 0
-...
+ bb.1:
+ successors: %bb.2
----
-name: remat_virtual_vgpr_subreg_occ_6
-tracksRegLiveness: true
-machineFunctionInfo:
- isEntryFunction: true
-body: |
- ; GFX908-LABEL: name: remat_virtual_vgpr_subreg_occ_6
- ; GFX908: bb.0:
- ; GFX908-NEXT: successors: %bb.1(0x80000000)
- ; GFX908-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
- ; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
- ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
- ; GFX908-NEXT: undef [[DEF:%[0-9]+]].sub0:vreg_512 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF:%[0-9]+]].sub1:vreg_512 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF:%[0-9]+]].sub2:vreg_512 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF:%[0-9]+]].sub3:vreg_512 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
- ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF31:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]].sub2, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_BRANCH %bb.1
- ; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: bb.1:
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]].sub0, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_30]], implicit [[V_CVT_I32_F32_e32_25]], implicit [[DEF]].sub0, implicit [[DEF]].sub2
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_31]], implicit [[V_CVT_I32_F32_e32_26]], implicit [[DEF24]], implicit [[DEF28]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_22]], implicit [[V_CVT_I32_F32_e32_27]], implicit [[DEF25]], implicit [[DEF29]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF26]], implicit [[DEF30]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_24]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF27]], implicit [[DEF31]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_6]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_7]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_8]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_9]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_10]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_11]], implicit [[V_CVT_I32_F32_e32_12]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_13]], implicit [[V_CVT_I32_F32_e32_14]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_13]], implicit [[V_CVT_I32_F32_e32_14]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_15]], implicit [[V_CVT_I32_F32_e32_16]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_17]], implicit [[V_CVT_I32_F32_e32_18]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_19]], implicit [[V_CVT_I32_F32_e32_20]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_21]]
- ; GFX908-NEXT: S_ENDPGM 0
- bb.0:
- liveins: $vgpr0, $sgpr0_sgpr1
+ %32:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
- %1:sgpr_64(p4) = COPY $sgpr0_sgpr1
- %2:vgpr_32(s32) = COPY $vgpr0
- %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- %4:sreg_64 = V_CMP_GT_U32_e64 %3.sub0, %2(s32), implicit $exec
- undef %5.sub1:sreg_64 = S_MOV_B32 0
- %5.sub0:sreg_64 = COPY %3.sub1
- undef %10.sub0:vreg_512 = IMPLICIT_DEF
- %10.sub1:vreg_512 = IMPLICIT_DEF
- %10.sub2:vreg_512 = IMPLICIT_DEF
- %10.sub3:vreg_512 = IMPLICIT_DEF
- %11:vgpr_32 = IMPLICIT_DEF
- %12:vgpr_32 = IMPLICIT_DEF
- %13:vgpr_32 = IMPLICIT_DEF
- %14:vgpr_32 = IMPLICIT_DEF
- %15:vgpr_32 = IMPLICIT_DEF
- %16:vgpr_32 = IMPLICIT_DEF
- %17:vgpr_32 = IMPLICIT_DEF
- %18:vgpr_32 = IMPLICIT_DEF
- %19:vgpr_32 = IMPLICIT_DEF
- %20:vgpr_32 = IMPLICIT_DEF
- %21:vgpr_32 = IMPLICIT_DEF
- %22:vgpr_32 = IMPLICIT_DEF
- %23:vgpr_32 = IMPLICIT_DEF
- %24:vgpr_32 = IMPLICIT_DEF
- %25:vgpr_32 = IMPLICIT_DEF
- %26:vgpr_32 = IMPLICIT_DEF
- %27:vgpr_32 = IMPLICIT_DEF
- %28:vgpr_32 = IMPLICIT_DEF
- %29:vgpr_32 = IMPLICIT_DEF
- %30:vgpr_32 = IMPLICIT_DEF
- %31:vgpr_32 = IMPLICIT_DEF
- %32:vgpr_32 = IMPLICIT_DEF
- %33:vgpr_32 = IMPLICIT_DEF
- %34:vgpr_32 = IMPLICIT_DEF
- %35:vgpr_32 = IMPLICIT_DEF
- %36:vgpr_32 = IMPLICIT_DEF
- %37:vgpr_32 = IMPLICIT_DEF
- %38:vgpr_32 = IMPLICIT_DEF
- %39:vgpr_32 = IMPLICIT_DEF
- %40:vgpr_32 = IMPLICIT_DEF
- %41:vgpr_32 = IMPLICIT_DEF
- %50:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %10.sub0, implicit $exec, implicit $mode
- %51:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %11, implicit $exec, implicit $mode
- %52:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %12, implicit $exec, implicit $mode
- %53:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %13, implicit $exec, implicit $mode
- %54:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %14, implicit $exec, implicit $mode
- %55:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %15, implicit $exec, implicit $mode
- %56:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %16, implicit $exec, implicit $mode
- %57:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %17, implicit $exec, implicit $mode
- %58:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %18, implicit $exec, implicit $mode
- %59:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %19, implicit $exec, implicit $mode
- %60:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %10.sub2, implicit $exec, implicit $mode
- %61:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %21, implicit $exec, implicit $mode
- %62:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %22, implicit $exec, implicit $mode
- %63:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %23, implicit $exec, implicit $mode
- %64:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %24, implicit $exec, implicit $mode
- %65:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %25, implicit $exec, implicit $mode
- %66:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %26, implicit $exec, implicit $mode
- %67:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %27, implicit $exec, implicit $mode
- %68:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %28, implicit $exec, implicit $mode
- %69:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %29, implicit $exec, implicit $mode
- %70:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %30, implicit $exec, implicit $mode
- %71:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %31, implicit $exec, implicit $mode
- %72:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %32, implicit $exec, implicit $mode
- %73:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %33, implicit $exec, implicit $mode
- %74:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %34, implicit $exec, implicit $mode
- %75:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %35, implicit $exec, implicit $mode
- %76:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %36, implicit $exec, implicit $mode
- %77:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %37, implicit $exec, implicit $mode
- %78:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %38, implicit $exec, implicit $mode
- %79:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %39, implicit $exec, implicit $mode
- %80:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %40, implicit $exec, implicit $mode
- %81:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %41, implicit $exec, implicit $mode
- S_BRANCH %bb.4
+ S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4,
+ S_NOP 0, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9
+ S_NOP 0, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14
+ S_NOP 0, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19
+ S_NOP 0, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24
+ S_NOP 0, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29
+ S_NOP 0, implicit %30, implicit %31, implicit %32
+
+ %33:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode
+ %34:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 34, implicit $exec, implicit $mode
+
+ bb.2:
+
+ S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4,
+ S_NOP 0, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9
+ S_NOP 0, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14
+ S_NOP 0, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19
+ S_NOP 0, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24
+ S_NOP 0, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29
+ S_NOP 0, implicit %30, implicit %31
+ S_NOP 0, implicit %33, implicit %34
- bb.4:
- S_NOP 0, implicit %50, implicit %60, implicit %10.sub0, implicit %10.sub2
- S_NOP 0, implicit %51, implicit %61, implicit %11, implicit %21
- S_NOP 0, implicit %52, implicit %62, implicit %12, implicit %22
- S_NOP 0, implicit %53, implicit %63, implicit %13, implicit %23
- S_NOP 0, implicit %54, implicit %64, implicit %14, implicit %24
- S_NOP 0, implicit %55, implicit %65
- S_NOP 0, implicit %56, implicit %66
- S_NOP 0, implicit %57, implicit %67
- S_NOP 0, implicit %58, implicit %68
- S_NOP 0, implicit %59, implicit %69
- S_NOP 0, implicit %70, implicit %71
- S_NOP 0, implicit %72, implicit %73
- S_NOP 0, implicit %72, implicit %73
- S_NOP 0, implicit %74, implicit %75
- S_NOP 0, implicit %76, implicit %77
- S_NOP 0, implicit %78, implicit %79
- S_NOP 0, implicit %80
S_ENDPGM 0
...
-
---
-name: remat_virtual_vgpr_subreg_uses_not_available
+name: test_rollback_remats_emptydefregion
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
- ; GFX908-LABEL: name: remat_virtual_vgpr_subreg_uses_not_available
+ ; GFX908-LABEL: name: test_rollback_remats_emptydefregion
; GFX908: bb.0:
; GFX908-NEXT: successors: %bb.1(0x80000000)
- ; GFX908-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
- ; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
- ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
- ; GFX908-NEXT: undef [[DEF:%[0-9]+]].sub0:vreg_512 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF:%[0-9]+]].sub1:vreg_512 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF:%[0-9]+]].sub2:vreg_512 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF:%[0-9]+]].sub3:vreg_512 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
- ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF31:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]].sub0, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]].sub2, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_BRANCH %bb.1
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: bb.1:
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_22]], implicit [[V_CVT_I32_F32_e32_27]], implicit [[DEF]].sub1, implicit [[DEF]].sub3
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_6]], implicit [[DEF24]], implicit [[DEF28]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF25]], implicit [[DEF29]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_24]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF26]], implicit [[DEF30]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_25]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF27]], implicit [[DEF31]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_26]], implicit [[V_CVT_I32_F32_e32_31]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_7]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_8]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_9]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_10]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_11]], implicit [[V_CVT_I32_F32_e32_12]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_13]], implicit [[V_CVT_I32_F32_e32_14]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_13]], implicit [[V_CVT_I32_F32_e32_14]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_15]], implicit [[V_CVT_I32_F32_e32_16]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_17]], implicit [[V_CVT_I32_F32_e32_18]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_19]], implicit [[V_CVT_I32_F32_e32_20]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_21]]
- ; GFX908-NEXT: S_ENDPGM 0
- bb.0:
- liveins: $vgpr0, $sgpr0_sgpr1
-
- %1:sgpr_64(p4) = COPY $sgpr0_sgpr1
- %2:vgpr_32(s32) = COPY $vgpr0
- %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- %4:sreg_64 = V_CMP_GT_U32_e64 %3.sub0, %2(s32), implicit $exec
- undef %5.sub1:sreg_64 = S_MOV_B32 0
- %5.sub0:sreg_64 = COPY %3.sub1
- undef %10.sub0:vreg_512 = IMPLICIT_DEF
- %10.sub1:vreg_512 = IMPLICIT_DEF
- %10.sub2:vreg_512 = IMPLICIT_DEF
- %10.sub3:vreg_512 = IMPLICIT_DEF
- %11:vgpr_32 = IMPLICIT_DEF
- %12:vgpr_32 = IMPLICIT_DEF
- %13:vgpr_32 = IMPLICIT_DEF
- %14:vgpr_32 = IMPLICIT_DEF
- %15:vgpr_32 = IMPLICIT_DEF
- %16:vgpr_32 = IMPLICIT_DEF
- %17:vgpr_32 = IMPLICIT_DEF
- %18:vgpr_32 = IMPLICIT_DEF
- %19:vgpr_32 = IMPLICIT_DEF
- %20:vgpr_32 = IMPLICIT_DEF
- %21:vgpr_32 = IMPLICIT_DEF
- %22:vgpr_32 = IMPLICIT_DEF
- %23:vgpr_32 = IMPLICIT_DEF
- %24:vgpr_32 = IMPLICIT_DEF
- %25:vgpr_32 = IMPLICIT_DEF
- %26:vgpr_32 = IMPLICIT_DEF
- %27:vgpr_32 = IMPLICIT_DEF
- %28:vgpr_32 = IMPLICIT_DEF
- %29:vgpr_32 = IMPLICIT_DEF
- %30:vgpr_32 = IMPLICIT_DEF
- %31:vgpr_32 = IMPLICIT_DEF
- %32:vgpr_32 = IMPLICIT_DEF
- %33:vgpr_32 = IMPLICIT_DEF
- %34:vgpr_32 = IMPLICIT_DEF
- %35:vgpr_32 = IMPLICIT_DEF
- %36:vgpr_32 = IMPLICIT_DEF
- %37:vgpr_32 = IMPLICIT_DEF
- %38:vgpr_32 = IMPLICIT_DEF
- %39:vgpr_32 = IMPLICIT_DEF
- %40:vgpr_32 = IMPLICIT_DEF
- %41:vgpr_32 = IMPLICIT_DEF
- %50:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %10.sub0, implicit $exec, implicit $mode
- %51:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %11, implicit $exec, implicit $mode
- %52:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %12, implicit $exec, implicit $mode
- %53:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %13, implicit $exec, implicit $mode
- %54:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %14, implicit $exec, implicit $mode
- %55:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %15, implicit $exec, implicit $mode
- %56:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %16, implicit $exec, implicit $mode
- %57:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %17, implicit $exec, implicit $mode
- %58:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %18, implicit $exec, implicit $mode
- %59:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %19, implicit $exec, implicit $mode
- %60:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %10.sub2, implicit $exec, implicit $mode
- %61:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %21, implicit $exec, implicit $mode
- %62:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %22, implicit $exec, implicit $mode
- %63:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %23, implicit $exec, implicit $mode
- %64:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %24, implicit $exec, implicit $mode
- %65:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %25, implicit $exec, implicit $mode
- %66:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %26, implicit $exec, implicit $mode
- %67:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %27, implicit $exec, implicit $mode
- %68:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %28, implicit $exec, implicit $mode
- %69:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %29, implicit $exec, implicit $mode
- %70:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %30, implicit $exec, implicit $mode
- %71:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %31, implicit $exec, implicit $mode
- %72:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %32, implicit $exec, implicit $mode
- %73:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %33, implicit $exec, implicit $mode
- %74:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %34, implicit $exec, implicit $mode
- %75:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %35, implicit $exec, implicit $mode
- %76:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %36, implicit $exec, implicit $mode
- %77:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %37, implicit $exec, implicit $mode
- %78:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %38, implicit $exec, implicit $mode
- %79:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %39, implicit $exec, implicit $mode
- %80:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %40, implicit $exec, implicit $mode
- %81:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %41, implicit $exec, implicit $mode
- S_BRANCH %bb.4
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.1:
+ ; GFX908-NEXT: successors: %bb.2(0x80000000)
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.2:
+ ; GFX908-NEXT: successors: %bb.3(0x80000000)
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]]
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.3:
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]]
+ ; GFX908-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1
- bb.4:
- S_NOP 0, implicit %50, implicit %60, implicit %10.sub1, implicit %10.sub3
- S_NOP 0, implicit %51, implicit %61, implicit %12, implicit %22
- S_NOP 0, implicit %52, implicit %62, implicit %13, implicit %23
- S_NOP 0, implicit %53, implicit %63, implicit %14, implicit %24
- S_NOP 0, implicit %54, implicit %64, implicit %15, implicit %25
- S_NOP 0, implicit %55, implicit %65
- S_NOP 0, implicit %56, implicit %66
- S_NOP 0, implicit %57, implicit %67
- S_NOP 0, implicit %58, implicit %68
- S_NOP 0, implicit %59, implicit %69
- S_NOP 0, implicit %70, implicit %71
- S_NOP 0, implicit %72, implicit %73
- S_NOP 0, implicit %72, implicit %73
- S_NOP 0, implicit %74, implicit %75
- S_NOP 0, implicit %76, implicit %77
- S_NOP 0, implicit %78, implicit %79
- S_NOP 0, implicit %80
- S_ENDPGM 0
-...
+ %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
+ %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ %3:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ %4:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
+ %5:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
+ %6:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
+ %7:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
+ %8:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
+ %9:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
+ %10:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ %11:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
+ %12:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ %13:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
+ %14:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
+ %15:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
+ %16:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
+ %17:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ %18:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
+ %19:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ %20:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ %21:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ %22:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ %23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ %25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ %26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ %27:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
+ %28:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
+ %29:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
+ %30:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
+ %31:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
+
+ bb.1:
+ successors: %bb.2
+
+ %32:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
+ %33:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode
+
+ bb.2:
+ successors: %bb.3
+
+ S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4,
+ S_NOP 0, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9
+ S_NOP 0, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14
+ S_NOP 0, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19
+ S_NOP 0, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24
+ S_NOP 0, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29
+ S_NOP 0, implicit %30, implicit %31, implicit %32, implicit %33
+
+ bb.3:
+ S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4,
+ S_NOP 0, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9
+ S_NOP 0, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14
+ S_NOP 0, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19
+ S_NOP 0, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24
+ S_NOP 0, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29
+ S_NOP 0, implicit %30, implicit %31
+ S_ENDPGM 0
+...
---
-name: remat_virtual_vgpr_subreg_def
+name: test_occ_8_physreg_use
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
- ; GFX908-LABEL: name: remat_virtual_vgpr_subreg_def
+ ; GFX908-LABEL: name: test_occ_8_physreg_use
; GFX908: bb.0:
; GFX908-NEXT: successors: %bb.1(0x80000000)
; GFX908-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
@@ -7095,95 +7163,93 @@ body: |
; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
- ; GFX908-NEXT: undef [[DEF:%[0-9]+]].sub0:vreg_512 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF:%[0-9]+]].sub1:vreg_512 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF:%[0-9]+]].sub2:vreg_512 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF:%[0-9]+]].sub3:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: $vgpr8 = IMPLICIT_DEF
+ ; GFX908-NEXT: $vgpr9 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
- ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
- ; GFX908-NEXT: undef [[V_CVT_I32_F32_e32_1:%[0-9]+]].sub1:vreg_64 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
+ ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 $vgpr8, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 $vgpr9, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF31:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
- ; GFX908-NEXT: undef [[V_CVT_I32_F32_e32_1:%[0-9]+]].sub0:vreg_64 = nofpexcept V_CVT_I32_F32_e32 [[DEF]].sub0, implicit $exec, implicit $mode
; GFX908-NEXT: S_BRANCH %bb.1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]].sub2, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]].sub0, implicit [[V_CVT_I32_F32_e32_26]], implicit [[DEF]].sub0, implicit [[DEF]].sub2
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_27]], implicit [[DEF1]], implicit [[DEF28]]
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF25]], implicit [[DEF29]]
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_29]], implicit [[V_CVT_I32_F32_e32_24]], implicit [[DEF26]], implicit [[DEF30]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_30]], implicit [[V_CVT_I32_F32_e32_25]], implicit [[DEF27]], implicit [[DEF31]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_7]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_8]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_9]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_10]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_6]], implicit [[V_CVT_I32_F32_e32_11]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_12]], implicit [[V_CVT_I32_F32_e32_13]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_14]], implicit [[V_CVT_I32_F32_e32_15]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_14]], implicit [[V_CVT_I32_F32_e32_15]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_16]], implicit [[V_CVT_I32_F32_e32_17]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_18]], implicit [[V_CVT_I32_F32_e32_19]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_20]], implicit [[V_CVT_I32_F32_e32_21]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_22]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_11]]
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_31]], implicit [[DEF30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_12]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_13]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_6]], implicit [[V_CVT_I32_F32_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_7]], implicit [[V_CVT_I32_F32_e32_15]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_8]], implicit [[V_CVT_I32_F32_e32_16]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_9]], implicit [[V_CVT_I32_F32_e32_17]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_10]], implicit [[V_CVT_I32_F32_e32_18]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_19]], implicit [[V_CVT_I32_F32_e32_20]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_21]], implicit [[V_CVT_I32_F32_e32_22]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_21]], implicit [[V_CVT_I32_F32_e32_22]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_24]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_25]], implicit [[V_CVT_I32_F32_e32_26]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_27]], implicit [[V_CVT_I32_F32_e32_28]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_29]]
; GFX908-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr0_sgpr1
@@ -7194,10 +7260,8 @@ body: |
%4:sreg_64 = V_CMP_GT_U32_e64 %3.sub0, %2(s32), implicit $exec
undef %5.sub1:sreg_64 = S_MOV_B32 0
%5.sub0:sreg_64 = COPY %3.sub1
- undef %10.sub0:vreg_512 = IMPLICIT_DEF
- %10.sub1:vreg_512 = IMPLICIT_DEF
- %10.sub2:vreg_512 = IMPLICIT_DEF
- %10.sub3:vreg_512 = IMPLICIT_DEF
+ $vgpr8 = IMPLICIT_DEF
+ $vgpr9 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vgpr_32 = IMPLICIT_DEF
%13:vgpr_32 = IMPLICIT_DEF
@@ -7229,8 +7293,8 @@ body: |
%39:vgpr_32 = IMPLICIT_DEF
%40:vgpr_32 = IMPLICIT_DEF
%41:vgpr_32 = IMPLICIT_DEF
- undef %50.sub0:vreg_64 = nofpexcept V_CVT_I32_F32_e32 %10.sub0, implicit $exec, implicit $mode
- %50.sub1:vreg_64 = nofpexcept V_CVT_I32_F32_e32 %11, implicit $exec, implicit $mode
+ %50:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 $vgpr8, implicit $exec, implicit $mode
+ %51:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 $vgpr9, implicit $exec, implicit $mode
%52:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %12, implicit $exec, implicit $mode
%53:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %13, implicit $exec, implicit $mode
%54:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %14, implicit $exec, implicit $mode
@@ -7239,7 +7303,7 @@ body: |
%57:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %17, implicit $exec, implicit $mode
%58:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %18, implicit $exec, implicit $mode
%59:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %19, implicit $exec, implicit $mode
- %60:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %10.sub2, implicit $exec, implicit $mode
+ %60:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %20, implicit $exec, implicit $mode
%61:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %21, implicit $exec, implicit $mode
%62:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %22, implicit $exec, implicit $mode
%63:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %23, implicit $exec, implicit $mode
@@ -7264,11 +7328,12 @@ body: |
S_BRANCH %bb.4
bb.4:
- S_NOP 0, implicit %50.sub0, implicit %60, implicit %10.sub0, implicit %10.sub2
- S_NOP 0, implicit %52, implicit %61, implicit %11, implicit %21
- S_NOP 0, implicit %52, implicit %62, implicit %12, implicit %22
- S_NOP 0, implicit %53, implicit %63, implicit %13, implicit %23
- S_NOP 0, implicit %54, implicit %64, implicit %14, implicit %24
+
+ S_NOP 0, implicit %52, implicit %62
+ S_NOP 0, implicit %50, implicit %60, implicit %20
+ S_NOP 0, implicit %51, implicit %61, implicit %21
+ S_NOP 0, implicit %53, implicit %63
+ S_NOP 0, implicit %54, implicit %64
S_NOP 0, implicit %55, implicit %65
S_NOP 0, implicit %56, implicit %66
S_NOP 0, implicit %57, implicit %67
@@ -7283,20 +7348,22 @@ body: |
S_NOP 0, implicit %80
S_ENDPGM 0
...
-
-# The remat candidate (instruction defining %3) uses a register (%2) which does not have a subrange for every subregister (sub1 is undefined).
-# Be sure that when checking if we can rematerialize %3, that we handle the liveness checking of %2 properly.
-
---
-name: omitted_subrange
+name: test_occ_8_exec_use
tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
body: |
- ; GFX908-LABEL: name: omitted_subrange
+ ; GFX908-LABEL: name: test_occ_8_exec_use
; GFX908: bb.0:
; GFX908-NEXT: successors: %bb.1(0x80000000)
- ; GFX908-NEXT: liveins: $sgpr3, $sgpr4
+ ; GFX908-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+ ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: dead [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
@@ -7314,115 +7381,89 @@ body: |
; GFX908-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
+ ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: $vgpr8 = IMPLICIT_DEF
+ ; GFX908-NEXT: $vgpr9 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF31:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF32:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; GFX908-NEXT: undef [[V_RCP_F32_e32_:%[0-9]+]].sub0:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_RCP_F32_e32 [[DEF32]].sub0, implicit $mode, implicit $exec
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
- ; GFX908-NEXT: dead [[DEF33:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF34:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 2, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
; GFX908-NEXT: S_BRANCH %bb.1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
- ; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
- ; GFX908-NEXT: liveins: $sgpr3, $sgpr4
- ; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: %temp:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_CMP_LG_U32 $sgpr3, $sgpr4, implicit-def $scc
- ; GFX908-NEXT: [[DEF34:%[0-9]+]].sub0:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_MUL_F32_e64 0, [[DEF32]].sub0, 1, %temp, 0, 0, implicit $mode, implicit $exec
- ; GFX908-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
- ; GFX908-NEXT: S_BRANCH %bb.3
- ; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: bb.2:
- ; GFX908-NEXT: successors: %bb.3(0x80000000)
- ; GFX908-NEXT: liveins: $sgpr3, $sgpr4
- ; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: undef [[V_FMA_F32_e64_:%[0-9]+]].sub0:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_FMA_F32_e64 0, [[DEF34]].sub1, 0, [[V_RCP_F32_e32_]].sub0, 0, [[DEF34]].sub0, 0, 0, implicit $mode, implicit $exec
- ; GFX908-NEXT: %temp2:vreg_64_align2 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_PK_MUL_F32_:%[0-9]+]]:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_PK_MUL_F32 0, [[V_RCP_F32_e32_]], 8, [[DEF32]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GFX908-NEXT: dead [[V_PK_FMA_F32_:%[0-9]+]]:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_PK_FMA_F32 0, [[V_FMA_F32_e64_]], 8, %temp2, 11, [[V_PK_MUL_F32_]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_28]], implicit [[V_CVT_I32_F32_e32_23]], implicit [[DEF22]], implicit [[DEF27]]
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_29]], implicit [[V_CVT_I32_F32_e32_24]], implicit [[DEF23]], implicit [[DEF28]]
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_30]], implicit [[V_CVT_I32_F32_e32_25]], implicit [[DEF24]], implicit [[DEF29]]
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
- ; GFX908-NEXT: dead [[DEF35:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_31]], implicit [[V_CVT_I32_F32_e32_26]], implicit [[DEF25]], implicit [[DEF30]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_22]], implicit [[V_CVT_I32_F32_e32_27]], implicit [[DEF26]], implicit [[DEF31]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_6]], implicit [[DEF32]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_7]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_8]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_9]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_10]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_11]], implicit [[V_CVT_I32_F32_e32_12]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_13]], implicit [[V_CVT_I32_F32_e32_14]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_13]], implicit [[V_CVT_I32_F32_e32_14]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_15]], implicit [[V_CVT_I32_F32_e32_16]]
+ ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 255
+ ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 [[S_MOV_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_9]]
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_31]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_28]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_10]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_11]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_12]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_13]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_6]], implicit [[V_CVT_I32_F32_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_7]], implicit [[V_CVT_I32_F32_e32_15]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_8]], implicit [[V_CVT_I32_F32_e32_16]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_17]], implicit [[V_CVT_I32_F32_e32_18]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_19]], implicit [[V_CVT_I32_F32_e32_20]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_21]]
- ; GFX908-NEXT: S_BRANCH %bb.3
- ; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: bb.3:
- ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000)
- ; GFX908-NEXT: liveins: $sgpr3, $sgpr4
- ; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: S_CMP_LG_U32 $sgpr4, 0, implicit-def $scc
- ; GFX908-NEXT: S_CBRANCH_SCC1 %bb.1, implicit killed $scc
- ; GFX908-NEXT: S_BRANCH %bb.4
- ; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: bb.4:
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_21]], implicit [[V_CVT_I32_F32_e32_22]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_24]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_25]], implicit [[V_CVT_I32_F32_e32_26]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_27]]
+ ; GFX908-NEXT: $exec = S_MOV_B64 [[S_AND_SAVEEXEC_B64_]]
; GFX908-NEXT: S_ENDPGM 0
bb.0:
- liveins: $sgpr3, $sgpr4
-
- %0:vreg_64 = IMPLICIT_DEF
- %1:vreg_64_align2 = IMPLICIT_DEF
- undef %2.sub0:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_RCP_F32_e32 %1.sub0:vreg_64_align2, implicit $mode, implicit $exec
- %3:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_PK_MUL_F32 0, %2, 8, %1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- %5:vreg_64_align2 = IMPLICIT_DEF
- %10:vgpr_32 = IMPLICIT_DEF
+ liveins: $vgpr0, $sgpr0_sgpr1
+
+ %1:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ %2:vgpr_32(s32) = COPY $vgpr0
+ %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ %4:sreg_64 = V_CMP_GT_U32_e64 %3.sub0, %2(s32), implicit $exec
+ undef %5.sub1:sreg_64 = S_MOV_B32 0
+ %5.sub0:sreg_64 = COPY %3.sub1
+ $vgpr8 = IMPLICIT_DEF
+ $vgpr9 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vgpr_32 = IMPLICIT_DEF
%13:vgpr_32 = IMPLICIT_DEF
@@ -7454,8 +7495,8 @@ body: |
%39:vgpr_32 = IMPLICIT_DEF
%40:vgpr_32 = IMPLICIT_DEF
%41:vgpr_32 = IMPLICIT_DEF
- %50:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %10, implicit $exec, implicit $mode
- %51:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %11, implicit $exec, implicit $mode
+ %50:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode
+ %51:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 2, implicit $exec, implicit $mode
%52:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %12, implicit $exec, implicit $mode
%53:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %13, implicit $exec, implicit $mode
%54:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %14, implicit $exec, implicit $mode
@@ -7486,52 +7527,29 @@ body: |
%79:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %39, implicit $exec, implicit $mode
%80:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %40, implicit $exec, implicit $mode
%81:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %41, implicit $exec, implicit $mode
- S_BRANCH %bb.1
-
- bb.1:
- liveins: $sgpr3, $sgpr4
-
- %temp:vgpr_32 = IMPLICIT_DEF
- %5.sub0:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_MUL_F32_e64 0, %1.sub0:vreg_64_align2, 1, %temp, 0, 0, implicit $mode, implicit $exec
-
- S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
- S_CBRANCH_SCC1 %bb.3, implicit killed $scc
S_BRANCH %bb.4
- bb.3:
- liveins: $sgpr3, $sgpr4
-
- %6:vreg_64_align2 = IMPLICIT_DEF
- undef %7.sub0:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_FMA_F32_e64 0, %5.sub1, 0, %2.sub0:vreg_64_align2, 0, %5.sub0:vreg_64_align2, 0, 0, implicit $mode, implicit $exec
- %temp2:vreg_64_align2 = IMPLICIT_DEF
- %8:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_PK_FMA_F32 0, %7:vreg_64_align2, 8, %temp2, 11, %3:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- S_NOP 0, implicit %50, implicit %60, implicit %10, implicit %20
- S_NOP 0, implicit %51, implicit %61, implicit %11, implicit %21
- S_NOP 0, implicit %52, implicit %62, implicit %12, implicit %22
- S_NOP 0, implicit %53, implicit %63, implicit %13, implicit %23
- S_NOP 0, implicit %54, implicit %64, implicit %14, implicit %24
- S_NOP 0, implicit %55, implicit %65, implicit %1
+ bb.4:
+
+ %100:sreg_64 = S_MOV_B64 255
+ %101:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed %100, implicit-def $exec, implicit-def $scc, implicit $exec
+ S_NOP 0, implicit %52, implicit %62
+ S_NOP 0, implicit %50, implicit %60, implicit %20
+ S_NOP 0, implicit %51, implicit %61, implicit %21
+ S_NOP 0, implicit %53, implicit %63
+ S_NOP 0, implicit %54, implicit %64
+ S_NOP 0, implicit %55, implicit %65
S_NOP 0, implicit %56, implicit %66
S_NOP 0, implicit %57, implicit %67
S_NOP 0, implicit %58, implicit %68
S_NOP 0, implicit %59, implicit %69
S_NOP 0, implicit %70, implicit %71
S_NOP 0, implicit %72, implicit %73
- S_NOP 0, implicit %72, implicit %73
S_NOP 0, implicit %74, implicit %75
S_NOP 0, implicit %76, implicit %77
S_NOP 0, implicit %78, implicit %79
S_NOP 0, implicit %80
- S_BRANCH %bb.4
-
- bb.4:
- liveins: $sgpr3, $sgpr4
-
- S_CMP_LG_U32 $sgpr4, 0, implicit-def $scc
- S_CBRANCH_SCC1 %bb.1, implicit killed $scc
- S_BRANCH %bb.2
-
- bb.2:
+ $exec = S_MOV_B64 %101:sreg_64_xexec
S_ENDPGM 0
-
...
+
>From 51683490d2e4c6661cca52fc83c64778e702f49d Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Wed, 5 Feb 2025 18:13:25 +0000
Subject: [PATCH 02/10] Fix spurious auto-formats and typo
The typo in GCNSubtarget::getMaxNumArchVGPRs made the function return
the wrong value in all cases. The returned value is now correct; this
causes some unit tests to break in largely cosmetic ways, since that
value is used to calculate excess RP in the remat stage.
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 11 +-
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 576 +++++++++++++-----
.../atomic_optimizations_global_pointer.ll | 4 +-
.../atomic_optimizations_local_pointer.ll | 141 ++---
.../test/CodeGen/AMDGPU/dynamic_stackalloc.ll | 41 +-
.../AMDGPU/global_atomics_scan_fadd.ll | 50 +-
.../AMDGPU/global_atomics_scan_fmax.ll | 22 +-
.../AMDGPU/global_atomics_scan_fmin.ll | 22 +-
.../AMDGPU/global_atomics_scan_fsub.ll | 70 +--
llvm/test/CodeGen/AMDGPU/idiv-licm.ll | 14 +-
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll | 4 +-
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll | 4 +-
.../llvm.amdgcn.struct.atomic.buffer.load.ll | 7 +-
...vm.amdgcn.struct.ptr.atomic.buffer.load.ll | 7 +-
.../test/CodeGen/AMDGPU/loop-prefetch-data.ll | 8 +-
.../CodeGen/AMDGPU/no-dup-inst-prefetch.ll | 2 +-
.../AMDGPU/pal-metadata-3.0-callable.ll | 2 +-
17 files changed, 636 insertions(+), 349 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 01cbd0aefba39..fbf1f9dae03a5 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -329,7 +329,7 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
const RegPressureTracker &RPTracker,
SchedCandidate &Cand,
bool IsBottomUp) {
- const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(TRI);
+ const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
unsigned SGPRPressure = 0;
unsigned VGPRPressure = 0;
@@ -1403,7 +1403,8 @@ GCNSchedStage::getScheduleMetrics(const std::vector<SUnit> &InputSchedule) {
#ifndef NDEBUG
LLVM_DEBUG(
printScheduleModel(ReadyCyclesSorted);
- dbgs() << "\n\t" << "Metric: "
+ dbgs() << "\n\t"
+ << "Metric: "
<< (SumBubbles
? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle
: 1)
@@ -1438,7 +1439,8 @@ GCNSchedStage::getScheduleMetrics(const GCNScheduleDAGMILive &DAG) {
#ifndef NDEBUG
LLVM_DEBUG(
printScheduleModel(ReadyCyclesSorted);
- dbgs() << "\n\t" << "Metric: "
+ dbgs() << "\n\t"
+ << "Metric: "
<< (SumBubbles
? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle
: 1)
@@ -1486,7 +1488,8 @@ bool UnclusteredHighRPStage::shouldRevertScheduling(unsigned WavesAfter) {
dbgs()
<< "\n\t *** In shouldRevertScheduling ***\n"
<< " *********** BEFORE UnclusteredHighRPStage ***********\n");
- ScheduleMetrics MBefore = getScheduleMetrics(DAG.SUnits);
+ ScheduleMetrics MBefore =
+ getScheduleMetrics(DAG.SUnits);
LLVM_DEBUG(
dbgs()
<< "\n *********** AFTER UnclusteredHighRPStage ***********\n");
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 29acb91afb82c..8345a498045f5 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -38,12 +38,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
// Following 2 enums are documented at:
// - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
enum class TrapHandlerAbi {
- NONE = 0x00,
+ NONE = 0x00,
AMDHSA = 0x01,
};
enum class TrapID {
- LLVMAMDHSATrap = 0x02,
+ LLVMAMDHSATrap = 0x02,
LLVMAMDHSADebugTrap = 0x03,
};
@@ -269,20 +269,24 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
const GCNTargetMachine &TM);
~GCNSubtarget() override;
- GCNSubtarget &initializeSubtargetDependencies(const Triple &TT, StringRef GPU,
- StringRef FS);
+ GCNSubtarget &initializeSubtargetDependencies(const Triple &TT,
+ StringRef GPU, StringRef FS);
/// Diagnose inconsistent subtarget features before attempting to codegen
/// function \p F.
void checkSubtargetFeatures(const Function &F) const;
- const SIInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const SIInstrInfo *getInstrInfo() const override {
+ return &InstrInfo;
+ }
const SIFrameLowering *getFrameLowering() const override {
return &FrameLowering;
}
- const SITargetLowering *getTargetLowering() const override { return &TLInfo; }
+ const SITargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
const SIRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
@@ -320,7 +324,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
- Generation getGeneration() const { return (Generation)Gen; }
+ Generation getGeneration() const {
+ return (Generation)Gen;
+ }
unsigned getMaxWaveScratchSize() const {
// See COMPUTE_TMPRING_SIZE.WAVESIZE.
@@ -341,7 +347,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return llvm::countl_zero(getMaxWaveScratchSize()) + getWavefrontSizeLog2();
}
- int getLDSBankCount() const { return LDSBankCount; }
+ int getLDSBankCount() const {
+ return LDSBankCount;
+ }
unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
@@ -356,17 +364,29 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool supportsWGP() const { return getGeneration() >= GFX10; }
- bool hasIntClamp() const { return HasIntClamp; }
+ bool hasIntClamp() const {
+ return HasIntClamp;
+ }
- bool hasFP64() const { return FP64; }
+ bool hasFP64() const {
+ return FP64;
+ }
- bool hasMIMG_R128() const { return MIMG_R128; }
+ bool hasMIMG_R128() const {
+ return MIMG_R128;
+ }
- bool hasHWFP64() const { return FP64; }
+ bool hasHWFP64() const {
+ return FP64;
+ }
- bool hasHalfRate64Ops() const { return HalfRate64Ops; }
+ bool hasHalfRate64Ops() const {
+ return HalfRate64Ops;
+ }
- bool hasFullRate64Ops() const { return FullRate64Ops; }
+ bool hasFullRate64Ops() const {
+ return FullRate64Ops;
+ }
bool hasAddr64() const {
return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
@@ -382,37 +402,65 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return getGeneration() >= VOLCANIC_ISLANDS;
}
- bool hasFractBug() const { return getGeneration() == SOUTHERN_ISLANDS; }
+ bool hasFractBug() const {
+ return getGeneration() == SOUTHERN_ISLANDS;
+ }
- bool hasBFE() const { return true; }
+ bool hasBFE() const {
+ return true;
+ }
- bool hasBFI() const { return true; }
+ bool hasBFI() const {
+ return true;
+ }
- bool hasBFM() const { return hasBFE(); }
+ bool hasBFM() const {
+ return hasBFE();
+ }
- bool hasBCNT(unsigned Size) const { return true; }
+ bool hasBCNT(unsigned Size) const {
+ return true;
+ }
- bool hasFFBL() const { return true; }
+ bool hasFFBL() const {
+ return true;
+ }
- bool hasFFBH() const { return true; }
+ bool hasFFBH() const {
+ return true;
+ }
- bool hasMed3_16() const { return getGeneration() >= AMDGPUSubtarget::GFX9; }
+ bool hasMed3_16() const {
+ return getGeneration() >= AMDGPUSubtarget::GFX9;
+ }
bool hasMin3Max3_16() const {
return getGeneration() >= AMDGPUSubtarget::GFX9;
}
- bool hasFmaMixInsts() const { return HasFmaMixInsts; }
+ bool hasFmaMixInsts() const {
+ return HasFmaMixInsts;
+ }
- bool hasCARRY() const { return true; }
+ bool hasCARRY() const {
+ return true;
+ }
- bool hasFMA() const { return FMA; }
+ bool hasFMA() const {
+ return FMA;
+ }
- bool hasSwap() const { return GFX9Insts; }
+ bool hasSwap() const {
+ return GFX9Insts;
+ }
- bool hasScalarPackInsts() const { return GFX9Insts; }
+ bool hasScalarPackInsts() const {
+ return GFX9Insts;
+ }
- bool hasScalarMulHiInsts() const { return GFX9Insts; }
+ bool hasScalarMulHiInsts() const {
+ return GFX9Insts;
+ }
bool hasScalarSubwordLoads() const { return getGeneration() >= GFX12; }
@@ -427,7 +475,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// True if the offset field of DS instructions works as expected. On SI, the
/// offset uses a 16-bit adder and does not always wrap properly.
- bool hasUsableDSOffset() const { return getGeneration() >= SEA_ISLANDS; }
+ bool hasUsableDSOffset() const {
+ return getGeneration() >= SEA_ISLANDS;
+ }
bool unsafeDSOffsetFoldingEnabled() const {
return EnableUnsafeDSOffsetFolding;
@@ -440,10 +490,14 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// Extra wait hazard is needed in some cases before
/// s_cbranch_vccnz/s_cbranch_vccz.
- bool hasReadVCCZBug() const { return getGeneration() <= SEA_ISLANDS; }
+ bool hasReadVCCZBug() const {
+ return getGeneration() <= SEA_ISLANDS;
+ }
/// Writes to VCC_LO/VCC_HI update the VCCZ flag.
- bool partialVCCWritesUpdateVCCZ() const { return getGeneration() >= GFX10; }
+ bool partialVCCWritesUpdateVCCZ() const {
+ return getGeneration() >= GFX10;
+ }
/// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
/// was written by a VALU instruction.
@@ -457,14 +511,18 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return getGeneration() >= VOLCANIC_ISLANDS;
}
- bool hasRFEHazards() const { return getGeneration() >= VOLCANIC_ISLANDS; }
+ bool hasRFEHazards() const {
+ return getGeneration() >= VOLCANIC_ISLANDS;
+ }
/// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
unsigned getSetRegWaitStates() const {
return getGeneration() <= SEA_ISLANDS ? 1 : 2;
}
- bool dumpCode() const { return DumpCode; }
+ bool dumpCode() const {
+ return DumpCode;
+ }
/// Return the amount of LDS that can be used that will not restrict the
/// occupancy lower than WaveCount.
@@ -480,17 +538,25 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return getGeneration() >= AMDGPUSubtarget::GFX10;
}
- bool useFlatForGlobal() const { return FlatForGlobal; }
+ bool useFlatForGlobal() const {
+ return FlatForGlobal;
+ }
/// \returns If target supports ds_read/write_b128 and user enables generation
/// of ds_read/write_b128.
- bool useDS128() const { return CIInsts && EnableDS128; }
+ bool useDS128() const {
+ return CIInsts && EnableDS128;
+ }
/// \return If target supports ds_read/write_b96/128.
- bool hasDS96AndDS128() const { return CIInsts; }
+ bool hasDS96AndDS128() const {
+ return CIInsts;
+ }
/// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
- bool haveRoundOpsF64() const { return CIInsts; }
+ bool haveRoundOpsF64() const {
+ return CIInsts;
+ }
/// \returns If MUBUF instructions always perform range checking, even for
/// buffer resources used for private memory access.
@@ -500,55 +566,89 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// \returns If target requires PRT Struct NULL support (zero result registers
/// for sparse texture support).
- bool usePRTStrictNull() const { return EnablePRTStrictNull; }
+ bool usePRTStrictNull() const {
+ return EnablePRTStrictNull;
+ }
- bool hasAutoWaitcntBeforeBarrier() const { return AutoWaitcntBeforeBarrier; }
+ bool hasAutoWaitcntBeforeBarrier() const {
+ return AutoWaitcntBeforeBarrier;
+ }
/// \returns true if the target supports backing off of s_barrier instructions
/// when an exception is raised.
- bool supportsBackOffBarrier() const { return BackOffBarrier; }
+ bool supportsBackOffBarrier() const {
+ return BackOffBarrier;
+ }
- bool hasUnalignedBufferAccess() const { return UnalignedBufferAccess; }
+ bool hasUnalignedBufferAccess() const {
+ return UnalignedBufferAccess;
+ }
bool hasUnalignedBufferAccessEnabled() const {
return UnalignedBufferAccess && UnalignedAccessMode;
}
- bool hasUnalignedDSAccess() const { return UnalignedDSAccess; }
+ bool hasUnalignedDSAccess() const {
+ return UnalignedDSAccess;
+ }
bool hasUnalignedDSAccessEnabled() const {
return UnalignedDSAccess && UnalignedAccessMode;
}
- bool hasUnalignedScratchAccess() const { return UnalignedScratchAccess; }
+ bool hasUnalignedScratchAccess() const {
+ return UnalignedScratchAccess;
+ }
bool hasUnalignedScratchAccessEnabled() const {
return UnalignedScratchAccess && UnalignedAccessMode;
}
- bool hasUnalignedAccessMode() const { return UnalignedAccessMode; }
+ bool hasUnalignedAccessMode() const {
+ return UnalignedAccessMode;
+ }
- bool hasApertureRegs() const { return HasApertureRegs; }
+ bool hasApertureRegs() const {
+ return HasApertureRegs;
+ }
- bool isTrapHandlerEnabled() const { return TrapHandler; }
+ bool isTrapHandlerEnabled() const {
+ return TrapHandler;
+ }
- bool isXNACKEnabled() const { return TargetID.isXnackOnOrAny(); }
+ bool isXNACKEnabled() const {
+ return TargetID.isXnackOnOrAny();
+ }
- bool isTgSplitEnabled() const { return EnableTgSplit; }
+ bool isTgSplitEnabled() const {
+ return EnableTgSplit;
+ }
- bool isCuModeEnabled() const { return EnableCuMode; }
+ bool isCuModeEnabled() const {
+ return EnableCuMode;
+ }
bool isPreciseMemoryEnabled() const { return EnablePreciseMemory; }
- bool hasFlatAddressSpace() const { return FlatAddressSpace; }
+ bool hasFlatAddressSpace() const {
+ return FlatAddressSpace;
+ }
- bool hasFlatScrRegister() const { return hasFlatAddressSpace(); }
+ bool hasFlatScrRegister() const {
+ return hasFlatAddressSpace();
+ }
- bool hasFlatInstOffsets() const { return FlatInstOffsets; }
+ bool hasFlatInstOffsets() const {
+ return FlatInstOffsets;
+ }
- bool hasFlatGlobalInsts() const { return FlatGlobalInsts; }
+ bool hasFlatGlobalInsts() const {
+ return FlatGlobalInsts;
+ }
- bool hasFlatScratchInsts() const { return FlatScratchInsts; }
+ bool hasFlatScratchInsts() const {
+ return FlatScratchInsts;
+ }
// Check if target supports ST addressing mode with FLAT scratch instructions.
// The ST addressing mode means no registers are used, either VGPR or SGPR,
@@ -559,20 +659,30 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }
- bool hasScalarFlatScratchInsts() const { return ScalarFlatScratchInsts; }
+ bool hasScalarFlatScratchInsts() const {
+ return ScalarFlatScratchInsts;
+ }
bool enableFlatScratch() const {
return flatScratchIsArchitected() ||
(EnableFlatScratch && hasFlatScratchInsts());
}
- bool hasGlobalAddTidInsts() const { return GFX10_BEncoding; }
+ bool hasGlobalAddTidInsts() const {
+ return GFX10_BEncoding;
+ }
- bool hasAtomicCSub() const { return GFX10_BEncoding; }
+ bool hasAtomicCSub() const {
+ return GFX10_BEncoding;
+ }
- bool hasExportInsts() const { return !hasGFX940Insts(); }
+ bool hasExportInsts() const {
+ return !hasGFX940Insts();
+ }
- bool hasVINTERPEncoding() const { return GFX11Insts; }
+ bool hasVINTERPEncoding() const {
+ return GFX11Insts;
+ }
// DS_ADD_F64/DS_ADD_RTN_F64
bool hasLdsAtomicAddF64() const { return hasGFX90AInsts(); }
@@ -581,98 +691,162 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return getGeneration() >= GFX9;
}
- bool hasFlatSegmentOffsetBug() const { return HasFlatSegmentOffsetBug; }
+ bool hasFlatSegmentOffsetBug() const {
+ return HasFlatSegmentOffsetBug;
+ }
- bool hasFlatLgkmVMemCountInOrder() const { return getGeneration() > GFX9; }
+ bool hasFlatLgkmVMemCountInOrder() const {
+ return getGeneration() > GFX9;
+ }
- bool hasD16LoadStore() const { return getGeneration() >= GFX9; }
+ bool hasD16LoadStore() const {
+ return getGeneration() >= GFX9;
+ }
bool d16PreservesUnusedBits() const {
return hasD16LoadStore() && !TargetID.isSramEccOnOrAny();
}
- bool hasD16Images() const { return getGeneration() >= VOLCANIC_ISLANDS; }
+ bool hasD16Images() const {
+ return getGeneration() >= VOLCANIC_ISLANDS;
+ }
/// Return if most LDS instructions have an m0 use that require m0 to be
/// initialized.
- bool ldsRequiresM0Init() const { return getGeneration() < GFX9; }
+ bool ldsRequiresM0Init() const {
+ return getGeneration() < GFX9;
+ }
// True if the hardware rewinds and replays GWS operations if a wave is
// preempted.
//
// If this is false, a GWS operation requires testing if a nack set the
// MEM_VIOL bit, and repeating if so.
- bool hasGWSAutoReplay() const { return getGeneration() >= GFX9; }
+ bool hasGWSAutoReplay() const {
+ return getGeneration() >= GFX9;
+ }
/// \returns if target has ds_gws_sema_release_all instruction.
- bool hasGWSSemaReleaseAll() const { return CIInsts; }
+ bool hasGWSSemaReleaseAll() const {
+ return CIInsts;
+ }
/// \returns true if the target has integer add/sub instructions that do not
/// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
/// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
/// for saturation.
- bool hasAddNoCarry() const { return AddNoCarryInsts; }
+ bool hasAddNoCarry() const {
+ return AddNoCarryInsts;
+ }
bool hasScalarAddSub64() const { return getGeneration() >= GFX12; }
bool hasScalarSMulU64() const { return getGeneration() >= GFX12; }
- bool hasUnpackedD16VMem() const { return HasUnpackedD16VMem; }
+ bool hasUnpackedD16VMem() const {
+ return HasUnpackedD16VMem;
+ }
// Covers VS/PS/CS graphics shaders
bool isMesaGfxShader(const Function &F) const {
return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
}
- bool hasMad64_32() const { return getGeneration() >= SEA_ISLANDS; }
+ bool hasMad64_32() const {
+ return getGeneration() >= SEA_ISLANDS;
+ }
- bool hasSDWAOmod() const { return HasSDWAOmod; }
+ bool hasSDWAOmod() const {
+ return HasSDWAOmod;
+ }
- bool hasSDWAScalar() const { return HasSDWAScalar; }
+ bool hasSDWAScalar() const {
+ return HasSDWAScalar;
+ }
- bool hasSDWASdst() const { return HasSDWASdst; }
+ bool hasSDWASdst() const {
+ return HasSDWASdst;
+ }
- bool hasSDWAMac() const { return HasSDWAMac; }
+ bool hasSDWAMac() const {
+ return HasSDWAMac;
+ }
- bool hasSDWAOutModsVOPC() const { return HasSDWAOutModsVOPC; }
+ bool hasSDWAOutModsVOPC() const {
+ return HasSDWAOutModsVOPC;
+ }
- bool hasDLInsts() const { return HasDLInsts; }
+ bool hasDLInsts() const {
+ return HasDLInsts;
+ }
bool hasFmacF64Inst() const { return HasFmacF64Inst; }
- bool hasDot1Insts() const { return HasDot1Insts; }
+ bool hasDot1Insts() const {
+ return HasDot1Insts;
+ }
- bool hasDot2Insts() const { return HasDot2Insts; }
+ bool hasDot2Insts() const {
+ return HasDot2Insts;
+ }
- bool hasDot3Insts() const { return HasDot3Insts; }
+ bool hasDot3Insts() const {
+ return HasDot3Insts;
+ }
- bool hasDot4Insts() const { return HasDot4Insts; }
+ bool hasDot4Insts() const {
+ return HasDot4Insts;
+ }
- bool hasDot5Insts() const { return HasDot5Insts; }
+ bool hasDot5Insts() const {
+ return HasDot5Insts;
+ }
- bool hasDot6Insts() const { return HasDot6Insts; }
+ bool hasDot6Insts() const {
+ return HasDot6Insts;
+ }
- bool hasDot7Insts() const { return HasDot7Insts; }
+ bool hasDot7Insts() const {
+ return HasDot7Insts;
+ }
- bool hasDot8Insts() const { return HasDot8Insts; }
+ bool hasDot8Insts() const {
+ return HasDot8Insts;
+ }
- bool hasDot9Insts() const { return HasDot9Insts; }
+ bool hasDot9Insts() const {
+ return HasDot9Insts;
+ }
- bool hasDot10Insts() const { return HasDot10Insts; }
+ bool hasDot10Insts() const {
+ return HasDot10Insts;
+ }
- bool hasDot11Insts() const { return HasDot11Insts; }
+ bool hasDot11Insts() const {
+ return HasDot11Insts;
+ }
- bool hasDot12Insts() const { return HasDot12Insts; }
+ bool hasDot12Insts() const {
+ return HasDot12Insts;
+ }
- bool hasDot13Insts() const { return HasDot13Insts; }
+ bool hasDot13Insts() const {
+ return HasDot13Insts;
+ }
- bool hasMAIInsts() const { return HasMAIInsts; }
+ bool hasMAIInsts() const {
+ return HasMAIInsts;
+ }
- bool hasFP8Insts() const { return HasFP8Insts; }
+ bool hasFP8Insts() const {
+ return HasFP8Insts;
+ }
bool hasFP8ConversionInsts() const { return HasFP8ConversionInsts; }
- bool hasPkFmacF16Inst() const { return HasPkFmacF16Inst; }
+ bool hasPkFmacF16Inst() const {
+ return HasPkFmacF16Inst;
+ }
bool hasAtomicFMinFMaxF32GlobalInsts() const {
return HasAtomicFMinFMaxF32GlobalInsts;
@@ -745,23 +919,37 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return HasDefaultComponentBroadcast;
}
- bool hasNoSdstCMPX() const { return HasNoSdstCMPX; }
+ bool hasNoSdstCMPX() const {
+ return HasNoSdstCMPX;
+ }
- bool hasVscnt() const { return HasVscnt; }
+ bool hasVscnt() const {
+ return HasVscnt;
+ }
- bool hasGetWaveIdInst() const { return HasGetWaveIdInst; }
+ bool hasGetWaveIdInst() const {
+ return HasGetWaveIdInst;
+ }
- bool hasSMemTimeInst() const { return HasSMemTimeInst; }
+ bool hasSMemTimeInst() const {
+ return HasSMemTimeInst;
+ }
- bool hasShaderCyclesRegister() const { return HasShaderCyclesRegister; }
+ bool hasShaderCyclesRegister() const {
+ return HasShaderCyclesRegister;
+ }
bool hasShaderCyclesHiLoRegisters() const {
return HasShaderCyclesHiLoRegisters;
}
- bool hasVOP3Literal() const { return HasVOP3Literal; }
+ bool hasVOP3Literal() const {
+ return HasVOP3Literal;
+ }
- bool hasNoDataDepHazard() const { return HasNoDataDepHazard; }
+ bool hasNoDataDepHazard() const {
+ return HasNoDataDepHazard;
+ }
bool vmemWriteNeedsExpWaitcnt() const {
return getGeneration() < SEA_ISLANDS;
@@ -786,11 +974,15 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
// dynamic realignment in common cases.
Align getStackAlignment() const { return Align(16); }
- bool enableMachineScheduler() const override { return true; }
+ bool enableMachineScheduler() const override {
+ return true;
+ }
bool useAA() const override;
- bool enableSubRegLiveness() const override { return true; }
+ bool enableSubRegLiveness() const override {
+ return true;
+ }
void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; }
@@ -799,7 +991,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
// XXX - Why is this here if it isn't in the default pass set?
- bool enableEarlyIfConversion() const override { return true; }
+ bool enableEarlyIfConversion() const override {
+ return true;
+ }
void overrideSchedPolicy(MachineSchedPolicy &Policy,
unsigned NumRegionInstrs) const override;
@@ -810,11 +1004,17 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return AMDGPU::getMaxNumUserSGPRs(*this);
}
- bool hasSMemRealTime() const { return HasSMemRealTime; }
+ bool hasSMemRealTime() const {
+ return HasSMemRealTime;
+ }
- bool hasMovrel() const { return HasMovrel; }
+ bool hasMovrel() const {
+ return HasMovrel;
+ }
- bool hasVGPRIndexMode() const { return HasVGPRIndexMode; }
+ bool hasVGPRIndexMode() const {
+ return HasVGPRIndexMode;
+ }
bool useVGPRIndexMode() const;
@@ -824,9 +1024,13 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasScalarDwordx3Loads() const { return HasScalarDwordx3Loads; }
- bool hasScalarStores() const { return HasScalarStores; }
+ bool hasScalarStores() const {
+ return HasScalarStores;
+ }
- bool hasScalarAtomics() const { return HasScalarAtomics; }
+ bool hasScalarAtomics() const {
+ return HasScalarAtomics;
+ }
bool hasLDSFPAtomicAddF32() const { return GFX8Insts; }
bool hasLDSFPAtomicAddF64() const { return GFX90AInsts; }
@@ -837,40 +1041,60 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// \returns true if the subtarget has the v_permlane64_b32 instruction.
bool hasPermLane64() const { return getGeneration() >= GFX11; }
- bool hasDPP() const { return HasDPP; }
+ bool hasDPP() const {
+ return HasDPP;
+ }
- bool hasDPPBroadcasts() const { return HasDPP && getGeneration() < GFX10; }
+ bool hasDPPBroadcasts() const {
+ return HasDPP && getGeneration() < GFX10;
+ }
bool hasDPPWavefrontShifts() const {
return HasDPP && getGeneration() < GFX10;
}
- bool hasDPP8() const { return HasDPP8; }
+ bool hasDPP8() const {
+ return HasDPP8;
+ }
- bool hasDPALU_DPP() const { return HasDPALU_DPP; }
+ bool hasDPALU_DPP() const {
+ return HasDPALU_DPP;
+ }
bool hasDPPSrc1SGPR() const { return HasDPPSrc1SGPR; }
- bool hasPackedFP32Ops() const { return HasPackedFP32Ops; }
+ bool hasPackedFP32Ops() const {
+ return HasPackedFP32Ops;
+ }
// Has V_PK_MOV_B32 opcode
- bool hasPkMovB32() const { return GFX90AInsts; }
+ bool hasPkMovB32() const {
+ return GFX90AInsts;
+ }
bool hasFmaakFmamkF32Insts() const {
return getGeneration() >= GFX10 || hasGFX940Insts();
}
- bool hasImageInsts() const { return HasImageInsts; }
+ bool hasImageInsts() const {
+ return HasImageInsts;
+ }
- bool hasExtendedImageInsts() const { return HasExtendedImageInsts; }
+ bool hasExtendedImageInsts() const {
+ return HasExtendedImageInsts;
+ }
- bool hasR128A16() const { return HasR128A16; }
+ bool hasR128A16() const {
+ return HasR128A16;
+ }
bool hasA16() const { return HasA16; }
bool hasG16() const { return HasG16; }
- bool hasOffset3fBug() const { return HasOffset3fBug; }
+ bool hasOffset3fBug() const {
+ return HasOffset3fBug;
+ }
bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; }
@@ -892,11 +1116,17 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return AMDGPU::getNSAMaxSize(*this, HasSampler);
}
- bool hasGFX10_AEncoding() const { return GFX10_AEncoding; }
+ bool hasGFX10_AEncoding() const {
+ return GFX10_AEncoding;
+ }
- bool hasGFX10_BEncoding() const { return GFX10_BEncoding; }
+ bool hasGFX10_BEncoding() const {
+ return GFX10_BEncoding;
+ }
- bool hasGFX10_3Insts() const { return GFX10_3Insts; }
+ bool hasGFX10_3Insts() const {
+ return GFX10_3Insts;
+ }
bool hasMadF16() const;
@@ -904,13 +1134,21 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasLshlAddB64() const { return GFX940Insts; }
- bool enableSIScheduler() const { return EnableSIScheduler; }
+ bool enableSIScheduler() const {
+ return EnableSIScheduler;
+ }
- bool loadStoreOptEnabled() const { return EnableLoadStoreOpt; }
+ bool loadStoreOptEnabled() const {
+ return EnableLoadStoreOpt;
+ }
- bool hasSGPRInitBug() const { return SGPRInitBug; }
+ bool hasSGPRInitBug() const {
+ return SGPRInitBug;
+ }
- bool hasUserSGPRInit16Bug() const { return UserSGPRInit16Bug && isWave32(); }
+ bool hasUserSGPRInit16Bug() const {
+ return UserSGPRInit16Bug && isWave32();
+ }
bool hasNegativeScratchOffsetBug() const { return NegativeScratchOffsetBug; }
@@ -918,14 +1156,18 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return NegativeUnalignedScratchOffsetBug;
}
- bool hasMFMAInlineLiteralBug() const { return HasMFMAInlineLiteralBug; }
+ bool hasMFMAInlineLiteralBug() const {
+ return HasMFMAInlineLiteralBug;
+ }
bool has12DWordStoreHazard() const {
return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
}
// \returns true if the subtarget supports DWORDX3 load/store instructions.
- bool hasDwordx3LoadStores() const { return CIInsts; }
+ bool hasDwordx3LoadStores() const {
+ return CIInsts;
+ }
bool hasReadM0MovRelInterpHazard() const {
return getGeneration() == AMDGPUSubtarget::GFX9;
@@ -944,23 +1186,39 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return getGeneration() == AMDGPUSubtarget::GFX9;
}
- bool hasVcmpxPermlaneHazard() const { return HasVcmpxPermlaneHazard; }
+ bool hasVcmpxPermlaneHazard() const {
+ return HasVcmpxPermlaneHazard;
+ }
- bool hasVMEMtoScalarWriteHazard() const { return HasVMEMtoScalarWriteHazard; }
+ bool hasVMEMtoScalarWriteHazard() const {
+ return HasVMEMtoScalarWriteHazard;
+ }
- bool hasSMEMtoVectorWriteHazard() const { return HasSMEMtoVectorWriteHazard; }
+ bool hasSMEMtoVectorWriteHazard() const {
+ return HasSMEMtoVectorWriteHazard;
+ }
- bool hasLDSMisalignedBug() const { return LDSMisalignedBug && !EnableCuMode; }
+ bool hasLDSMisalignedBug() const {
+ return LDSMisalignedBug && !EnableCuMode;
+ }
- bool hasInstFwdPrefetchBug() const { return HasInstFwdPrefetchBug; }
+ bool hasInstFwdPrefetchBug() const {
+ return HasInstFwdPrefetchBug;
+ }
- bool hasVcmpxExecWARHazard() const { return HasVcmpxExecWARHazard; }
+ bool hasVcmpxExecWARHazard() const {
+ return HasVcmpxExecWARHazard;
+ }
- bool hasLdsBranchVmemWARHazard() const { return HasLdsBranchVmemWARHazard; }
+ bool hasLdsBranchVmemWARHazard() const {
+ return HasLdsBranchVmemWARHazard;
+ }
// Shift amount of a 64 bit shift cannot be a highest allocated register
// if also at the end of the allocation block.
- bool hasShift64HighRegBug() const { return GFX90AInsts && !GFX940Insts; }
+ bool hasShift64HighRegBug() const {
+ return GFX90AInsts && !GFX940Insts;
+ }
// Has one cycle hazard on transcendental instruction feeding a
// non transcendental VALU.
@@ -974,9 +1232,13 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasDOTOpSelHazard() const { return GFX940Insts || GFX11Insts; }
// Does not have HW interlocs for VALU writing and then reading SGPRs.
- bool hasVDecCoExecHazard() const { return GFX940Insts; }
+ bool hasVDecCoExecHazard() const {
+ return GFX940Insts;
+ }
- bool hasNSAtoVMEMBug() const { return HasNSAtoVMEMBug; }
+ bool hasNSAtoVMEMBug() const {
+ return HasNSAtoVMEMBug;
+ }
bool hasNSAClauseBug() const { return HasNSAClauseBug; }
@@ -1046,7 +1308,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// Returns true if the target supports
/// global_load_lds_dwordx3/global_load_lds_dwordx4 or
/// buffer_load_dwordx3/buffer_load_dwordx4 with the lds bit.
- bool hasLDSLoadB96_B128() const { return hasGFX950Insts(); }
+ bool hasLDSLoadB96_B128() const {
+ return hasGFX950Insts();
+ }
bool hasSALUFloatInsts() const { return HasSALUFloatInsts; }
@@ -1077,11 +1341,17 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasPermlane32Swap() const { return HasPermlane32Swap; }
bool hasAshrPkInsts() const { return HasAshrPkInsts; }
- bool hasMinimum3Maximum3F32() const { return HasMinimum3Maximum3F32; }
+ bool hasMinimum3Maximum3F32() const {
+ return HasMinimum3Maximum3F32;
+ }
- bool hasMinimum3Maximum3F16() const { return HasMinimum3Maximum3F16; }
+ bool hasMinimum3Maximum3F16() const {
+ return HasMinimum3Maximum3F16;
+ }
- bool hasMinimum3Maximum3PKF16() const { return HasMinimum3Maximum3PKF16; }
+ bool hasMinimum3Maximum3PKF16() const {
+ return HasMinimum3Maximum3PKF16;
+ }
/// \returns The maximum number of instructions that can be enclosed in an
/// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
@@ -1098,9 +1368,13 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// VGPRs
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
- /// Return occupancy for the given function. Used LDS and a number of
- /// registers if provided.
- /// Note, occupancy can be affected by the scratch allocation as well, but
+ /// Subtarget's minimum/maximum occupancy, in number of waves per EU, that can
+ /// be achieved when the only function running on a CU is \p F, each workgroup
+ /// uses \p LDSSize bytes of LDS, and each wave uses \p NumSGPRs SGPRs and \p
+ /// NumVGPRs VGPRs. The flat workgroup sizes associated to the function are a
+ /// range, so this returns a range as well.
+ ///
+ /// Note that occupancy can be affected by the scratch allocation as well, but
/// we do not have enough information to compute it.
std::pair<unsigned, unsigned> computeOccupancy(const Function &F,
unsigned LDSSize = 0,
@@ -1128,7 +1402,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// \returns true if the machine has merged shaders in which s0-s7 are
/// reserved by the hardware and user SGPRs start at s8
- bool hasMergedShaders() const { return getGeneration() >= GFX9; }
+ bool hasMergedShaders() const {
+ return getGeneration() >= GFX9;
+ }
// \returns true if the target supports the pre-NGG legacy geometry path.
bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
@@ -1286,7 +1562,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// \returns the maximum number of ArchVGPRs that can be used and still
/// achieve at least the specified number of waves \p WavesPerEU.
unsigned getMaxNumArchVGPRs(unsigned WavesPerEU) const {
- return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
+ return AMDGPU::IsaInfo::getMaxNumArchVGPRs(this, WavesPerEU);
}
/// \returns max num VGPRs. This is the common utility function called by
@@ -1311,7 +1587,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// function \p F.
unsigned getMaxNumArchVGPRs(const Function &F) const;
- unsigned getMaxNumAGPRs(const Function &F) const { return getMaxNumVGPRs(F); }
+ unsigned getMaxNumAGPRs(const Function &F) const {
+ return getMaxNumVGPRs(F);
+ }
/// \returns Maximum number of VGPRs that meets number of waves per execution
/// unit requirement for function \p MF, or number of VGPRs explicitly
@@ -1323,9 +1601,13 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// unit requirement.
unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
- bool isWave32() const { return getWavefrontSize() == 32; }
+ bool isWave32() const {
+ return getWavefrontSize() == 32;
+ }
- bool isWave64() const { return getWavefrontSize() == 64; }
+ bool isWave64() const {
+ return getWavefrontSize() == 64;
+ }
/// Returns if the wavesize of this subtarget is known reliable. This is false
/// only for the a default target-cpu that does not have an explicit
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
index 8bb8ecb079a34..169cc0c51f4a4 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
@@ -2745,8 +2745,8 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1232_ITERATIVE-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1232_ITERATIVE-NEXT: s_ctz_i32_b32 s1, s0
; GFX1232_ITERATIVE-NEXT: s_wait_alu 0xfffe
-; GFX1232_ITERATIVE-NEXT: v_readlane_b32 s3, v3, s1
; GFX1232_ITERATIVE-NEXT: v_readlane_b32 s2, v2, s1
+; GFX1232_ITERATIVE-NEXT: v_readlane_b32 s3, v3, s1
; GFX1232_ITERATIVE-NEXT: s_lshl_b32 s8, 1, s1
; GFX1232_ITERATIVE-NEXT: v_writelane_b32 v1, s7, s1
; GFX1232_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s1
@@ -6333,8 +6333,8 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1232_ITERATIVE-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1232_ITERATIVE-NEXT: s_ctz_i32_b32 s1, s0
; GFX1232_ITERATIVE-NEXT: s_wait_alu 0xfffe
-; GFX1232_ITERATIVE-NEXT: v_readlane_b32 s3, v3, s1
; GFX1232_ITERATIVE-NEXT: v_readlane_b32 s2, v2, s1
+; GFX1232_ITERATIVE-NEXT: v_readlane_b32 s3, v3, s1
; GFX1232_ITERATIVE-NEXT: s_lshl_b32 s8, 1, s1
; GFX1232_ITERATIVE-NEXT: v_writelane_b32 v1, s7, s1
; GFX1232_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s1
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
index 3c0646c46efd0..2edfe67fcd56f 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
@@ -1026,23 +1026,24 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_mov_b32_dpp v3, v1 row_shr:1 row_mask:0xf bank_mask:0xf
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
+; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX1132_DPP-NEXT: s_mov_b32 s0, s2
+; GFX1132_DPP-NEXT: s_mov_b32 s1, s2
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
-; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
+; GFX1132_DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB2_2
; GFX1132_DPP-NEXT: ; %bb.1:
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, s0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, s1
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1132_DPP-NEXT: ds_add_rtn_u32 v0, v4, v0
; GFX1132_DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132_DPP-NEXT: buffer_gl0_inv
; GFX1132_DPP-NEXT: .LBB2_2:
-; GFX1132_DPP-NEXT: s_or_b32 exec_lo, exec_lo, s1
+; GFX1132_DPP-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1132_DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1132_DPP-NEXT: v_readfirstlane_b32 s3, v0
; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, v3
@@ -2806,19 +2807,19 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_mov_b32_dpp v7, v1 row_shr:1 row_mask:0xf bank_mask:0xf
; GFX1132_DPP-NEXT: v_readlane_b32 s6, v1, 15
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
-; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v8, exec_lo, 0
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
+; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s2, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v6, s3, 16
; GFX1132_DPP-NEXT: v_writelane_b32 v7, s6, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
-; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v8
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
+; GFX1132_DPP-NEXT: s_mov_b32 s3, exec_lo
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr8_vgpr9
-; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
+; GFX1132_DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB6_2
; GFX1132_DPP-NEXT: ; %bb.1:
; GFX1132_DPP-NEXT: v_dual_mov_b32 v9, s1 :: v_dual_mov_b32 v8, s0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132_DPP-NEXT: ds_add_rtn_u64 v[8:9], v0, v[8:9]
; GFX1132_DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132_DPP-NEXT: buffer_gl0_inv
@@ -4448,23 +4449,24 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_mov_b32_dpp v3, v1 row_shr:1 row_mask:0xf bank_mask:0xf
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
+; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX1132_DPP-NEXT: s_mov_b32 s0, s2
+; GFX1132_DPP-NEXT: s_mov_b32 s1, s2
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
-; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
+; GFX1132_DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB10_2
; GFX1132_DPP-NEXT: ; %bb.1:
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, s0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, s1
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1132_DPP-NEXT: ds_sub_rtn_u32 v0, v4, v0
; GFX1132_DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132_DPP-NEXT: buffer_gl0_inv
; GFX1132_DPP-NEXT: .LBB10_2:
-; GFX1132_DPP-NEXT: s_or_b32 exec_lo, exec_lo, s1
+; GFX1132_DPP-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1132_DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1132_DPP-NEXT: v_readfirstlane_b32 s3, v0
; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, v3
@@ -6251,19 +6253,19 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_mov_b32_dpp v7, v1 row_shr:1 row_mask:0xf bank_mask:0xf
; GFX1132_DPP-NEXT: v_readlane_b32 s6, v1, 15
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
-; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v8, exec_lo, 0
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
+; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s2, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v6, s3, 16
; GFX1132_DPP-NEXT: v_writelane_b32 v7, s6, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
-; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v8
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
+; GFX1132_DPP-NEXT: s_mov_b32 s3, exec_lo
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr8_vgpr9
-; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
+; GFX1132_DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB14_2
; GFX1132_DPP-NEXT: ; %bb.1:
; GFX1132_DPP-NEXT: v_dual_mov_b32 v9, s1 :: v_dual_mov_b32 v8, s0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132_DPP-NEXT: ds_sub_rtn_u64 v[8:9], v0, v[8:9]
; GFX1132_DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132_DPP-NEXT: buffer_gl0_inv
@@ -7195,8 +7197,8 @@ define amdgpu_kernel void @and_i64_varying(ptr addrspace(1) %out) {
; GFX1132_ITERATIVE-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132_ITERATIVE-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s7, v3, s3
; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s6, v2, s3
+; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s7, v3, s3
; GFX1132_ITERATIVE-NEXT: s_lshl_b32 s8, 1, s3
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v1, s1, s3
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v0, s0, s3
@@ -7616,19 +7618,19 @@ define amdgpu_kernel void @and_i64_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_readlane_b32 s6, v2, 15
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
; GFX1132_DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v7, exec_lo, 0
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
+; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s2, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v6, s3, 16
; GFX1132_DPP-NEXT: v_writelane_b32 v5, s6, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
-; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
+; GFX1132_DPP-NEXT: s_mov_b32 s3, exec_lo
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
-; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
+; GFX1132_DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB16_2
; GFX1132_DPP-NEXT: ; %bb.1:
; GFX1132_DPP-NEXT: v_dual_mov_b32 v8, s1 :: v_dual_mov_b32 v7, s0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132_DPP-NEXT: ds_and_rtn_b64 v[7:8], v0, v[7:8]
; GFX1132_DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132_DPP-NEXT: buffer_gl0_inv
@@ -8230,23 +8232,24 @@ define amdgpu_kernel void @or_i32_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_mov_b32_dpp v3, v1 row_shr:1 row_mask:0xf bank_mask:0xf
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
+; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX1132_DPP-NEXT: s_mov_b32 s0, s2
+; GFX1132_DPP-NEXT: s_mov_b32 s1, s2
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
-; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
+; GFX1132_DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB17_2
; GFX1132_DPP-NEXT: ; %bb.1:
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, s0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, s1
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1132_DPP-NEXT: ds_or_rtn_b32 v0, v4, v0
; GFX1132_DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132_DPP-NEXT: buffer_gl0_inv
; GFX1132_DPP-NEXT: .LBB17_2:
-; GFX1132_DPP-NEXT: s_or_b32 exec_lo, exec_lo, s1
+; GFX1132_DPP-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1132_DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1132_DPP-NEXT: v_readfirstlane_b32 s3, v0
; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, v3
@@ -8559,8 +8562,8 @@ define amdgpu_kernel void @or_i64_varying(ptr addrspace(1) %out) {
; GFX1132_ITERATIVE-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132_ITERATIVE-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s7, v3, s3
; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s6, v2, s3
+; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s7, v3, s3
; GFX1132_ITERATIVE-NEXT: s_lshl_b32 s8, 1, s3
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v1, s1, s3
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v0, s0, s3
@@ -8980,19 +8983,19 @@ define amdgpu_kernel void @or_i64_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_readlane_b32 s6, v2, 15
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
; GFX1132_DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v7, exec_lo, 0
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
+; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s2, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v6, s3, 16
; GFX1132_DPP-NEXT: v_writelane_b32 v5, s6, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
-; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
+; GFX1132_DPP-NEXT: s_mov_b32 s3, exec_lo
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
-; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
+; GFX1132_DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB18_2
; GFX1132_DPP-NEXT: ; %bb.1:
; GFX1132_DPP-NEXT: v_dual_mov_b32 v8, s1 :: v_dual_mov_b32 v7, s0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132_DPP-NEXT: ds_or_rtn_b64 v[7:8], v0, v[7:8]
; GFX1132_DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132_DPP-NEXT: buffer_gl0_inv
@@ -9594,23 +9597,24 @@ define amdgpu_kernel void @xor_i32_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_mov_b32_dpp v3, v1 row_shr:1 row_mask:0xf bank_mask:0xf
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
+; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX1132_DPP-NEXT: s_mov_b32 s0, s2
+; GFX1132_DPP-NEXT: s_mov_b32 s1, s2
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
-; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
+; GFX1132_DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB19_2
; GFX1132_DPP-NEXT: ; %bb.1:
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, s0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, s1
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1132_DPP-NEXT: ds_xor_rtn_b32 v0, v4, v0
; GFX1132_DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132_DPP-NEXT: buffer_gl0_inv
; GFX1132_DPP-NEXT: .LBB19_2:
-; GFX1132_DPP-NEXT: s_or_b32 exec_lo, exec_lo, s1
+; GFX1132_DPP-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1132_DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1132_DPP-NEXT: v_readfirstlane_b32 s3, v0
; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, v3
@@ -9923,8 +9927,8 @@ define amdgpu_kernel void @xor_i64_varying(ptr addrspace(1) %out) {
; GFX1132_ITERATIVE-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132_ITERATIVE-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s7, v3, s3
; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s6, v2, s3
+; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s7, v3, s3
; GFX1132_ITERATIVE-NEXT: s_lshl_b32 s8, 1, s3
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v1, s1, s3
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v0, s0, s3
@@ -10344,19 +10348,19 @@ define amdgpu_kernel void @xor_i64_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_readlane_b32 s6, v2, 15
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
; GFX1132_DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v7, exec_lo, 0
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
+; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s2, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v6, s3, 16
; GFX1132_DPP-NEXT: v_writelane_b32 v5, s6, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
-; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
+; GFX1132_DPP-NEXT: s_mov_b32 s3, exec_lo
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
-; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
+; GFX1132_DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB20_2
; GFX1132_DPP-NEXT: ; %bb.1:
; GFX1132_DPP-NEXT: v_dual_mov_b32 v8, s1 :: v_dual_mov_b32 v7, s0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132_DPP-NEXT: ds_xor_rtn_b64 v[7:8], v0, v[7:8]
; GFX1132_DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132_DPP-NEXT: buffer_gl0_inv
@@ -11569,8 +11573,8 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) {
; GFX1132_ITERATIVE-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132_ITERATIVE-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s7, v3, s3
; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s6, v2, s3
+; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s7, v3, s3
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v1, s1, s3
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v0, s0, s3
; GFX1132_ITERATIVE-NEXT: v_cmp_gt_i64_e64 s8, s[0:1], s[6:7]
@@ -12177,19 +12181,19 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_mov_b32_dpp v4, v1 row_shr:1 row_mask:0xf bank_mask:0xf
; GFX1132_DPP-NEXT: v_readlane_b32 s6, v1, 15
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
-; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v7, exec_lo, 0
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
+; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s2, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v5, s3, 16
; GFX1132_DPP-NEXT: v_writelane_b32 v4, s6, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
-; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
+; GFX1132_DPP-NEXT: s_mov_b32 s3, exec_lo
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
-; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
+; GFX1132_DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB23_2
; GFX1132_DPP-NEXT: ; %bb.1:
; GFX1132_DPP-NEXT: v_dual_mov_b32 v8, s1 :: v_dual_mov_b32 v7, s0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132_DPP-NEXT: ds_max_rtn_i64 v[7:8], v0, v[7:8]
; GFX1132_DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132_DPP-NEXT: buffer_gl0_inv
@@ -13403,8 +13407,8 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) {
; GFX1132_ITERATIVE-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132_ITERATIVE-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s7, v3, s3
; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s6, v2, s3
+; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s7, v3, s3
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v1, s1, s3
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v0, s0, s3
; GFX1132_ITERATIVE-NEXT: v_cmp_lt_i64_e64 s8, s[0:1], s[6:7]
@@ -14011,19 +14015,19 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_mov_b32_dpp v4, v1 row_shr:1 row_mask:0xf bank_mask:0xf
; GFX1132_DPP-NEXT: v_readlane_b32 s6, v1, 15
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
-; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v7, exec_lo, 0
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
+; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s2, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v5, s3, 16
; GFX1132_DPP-NEXT: v_writelane_b32 v4, s6, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
-; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
+; GFX1132_DPP-NEXT: s_mov_b32 s3, exec_lo
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
-; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
+; GFX1132_DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB26_2
; GFX1132_DPP-NEXT: ; %bb.1:
; GFX1132_DPP-NEXT: v_dual_mov_b32 v8, s1 :: v_dual_mov_b32 v7, s0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132_DPP-NEXT: ds_min_rtn_i64 v[7:8], v0, v[7:8]
; GFX1132_DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132_DPP-NEXT: buffer_gl0_inv
@@ -14626,23 +14630,24 @@ define amdgpu_kernel void @umax_i32_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_mov_b32_dpp v3, v1 row_shr:1 row_mask:0xf bank_mask:0xf
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
+; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX1132_DPP-NEXT: s_mov_b32 s0, s2
+; GFX1132_DPP-NEXT: s_mov_b32 s1, s2
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
-; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
+; GFX1132_DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB27_2
; GFX1132_DPP-NEXT: ; %bb.1:
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, s0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, s1
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1132_DPP-NEXT: ds_max_rtn_u32 v0, v4, v0
; GFX1132_DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132_DPP-NEXT: buffer_gl0_inv
; GFX1132_DPP-NEXT: .LBB27_2:
-; GFX1132_DPP-NEXT: s_or_b32 exec_lo, exec_lo, s1
+; GFX1132_DPP-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1132_DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1132_DPP-NEXT: v_readfirstlane_b32 s3, v0
; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, v3
@@ -15226,8 +15231,8 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) {
; GFX1132_ITERATIVE-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132_ITERATIVE-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s7, v3, s3
; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s6, v2, s3
+; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s7, v3, s3
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v1, s1, s3
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v0, s0, s3
; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -15830,19 +15835,19 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_mov_b32_dpp v4, v1 row_shr:1 row_mask:0xf bank_mask:0xf
; GFX1132_DPP-NEXT: v_readlane_b32 s6, v1, 15
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
-; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v7, exec_lo, 0
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
+; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s2, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v5, s3, 16
; GFX1132_DPP-NEXT: v_writelane_b32 v4, s6, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
-; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
+; GFX1132_DPP-NEXT: s_mov_b32 s3, exec_lo
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
-; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
+; GFX1132_DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB29_2
; GFX1132_DPP-NEXT: ; %bb.1:
; GFX1132_DPP-NEXT: v_dual_mov_b32 v8, s1 :: v_dual_mov_b32 v7, s0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132_DPP-NEXT: ds_max_rtn_u64 v[7:8], v0, v[7:8]
; GFX1132_DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132_DPP-NEXT: buffer_gl0_inv
@@ -17046,8 +17051,8 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) {
; GFX1132_ITERATIVE-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132_ITERATIVE-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s7, v3, s3
; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s6, v2, s3
+; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s7, v3, s3
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v1, s1, s3
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v0, s0, s3
; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -17648,19 +17653,19 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_mov_b32_dpp v4, v1 row_shr:1 row_mask:0xf bank_mask:0xf
; GFX1132_DPP-NEXT: v_readlane_b32 s6, v1, 15
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
-; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v7, exec_lo, 0
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
+; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s2, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v5, s3, 16
; GFX1132_DPP-NEXT: v_writelane_b32 v4, s6, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
-; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
+; GFX1132_DPP-NEXT: s_mov_b32 s3, exec_lo
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
-; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
+; GFX1132_DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB32_2
; GFX1132_DPP-NEXT: ; %bb.1:
; GFX1132_DPP-NEXT: v_dual_mov_b32 v8, s1 :: v_dual_mov_b32 v7, s0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132_DPP-NEXT: ds_min_rtn_u64 v[7:8], v0, v[7:8]
; GFX1132_DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132_DPP-NEXT: buffer_gl0_inv
diff --git a/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll b/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll
index d61c4b46596c0..7c0ae8e726ad4 100644
--- a/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll
@@ -2084,15 +2084,14 @@ define void @test_dynamic_stackalloc_device_multiple_allocas(i32 %n, i32 %m) {
; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v1
; GFX11-SDAG-NEXT: .LBB14_6: ; %bb.1
; GFX11-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s1
-; GFX11-SDAG-NEXT: v_lshl_add_u32 v1, v0, 2, 15
-; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 2
+; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15
; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_and_b32_e32 v1, -16, v1
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_and_b32_e32 v0, -16, v0
; GFX11-SDAG-NEXT: .LBB14_7: ; =>This Inner Loop Header: Depth=1
; GFX11-SDAG-NEXT: s_ctz_i32_b32 s2, s1
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX11-SDAG-NEXT: v_readlane_b32 s3, v1, s2
+; GFX11-SDAG-NEXT: v_readlane_b32 s3, v0, s2
; GFX11-SDAG-NEXT: s_bitset0_b32 s1, s2
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-NEXT: s_max_u32 s0, s0, s3
@@ -2100,16 +2099,16 @@ define void @test_dynamic_stackalloc_device_multiple_allocas(i32 %n, i32 %m) {
; GFX11-SDAG-NEXT: s_cbranch_scc1 .LBB14_7
; GFX11-SDAG-NEXT: ; %bb.8:
; GFX11-SDAG-NEXT: s_mov_b32 s1, s32
-; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 1
-; GFX11-SDAG-NEXT: v_lshl_add_u32 v1, s0, 5, s1
-; GFX11-SDAG-NEXT: scratch_store_b32 off, v2, s33 dlc
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2
+; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, s0, 5, s1
+; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s33 dlc
; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-SDAG-NEXT: scratch_store_b32 off, v0, s1 dlc
+; GFX11-SDAG-NEXT: scratch_store_b32 off, v2, s1 dlc
; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v1
+; GFX11-SDAG-NEXT: s_mov_b32 s33, s7
+; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v0
; GFX11-SDAG-NEXT: s_mov_b32 s32, s34
; GFX11-SDAG-NEXT: s_mov_b32 s34, s8
-; GFX11-SDAG-NEXT: s_mov_b32 s33, s7
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_multiple_allocas:
@@ -2364,15 +2363,14 @@ define void @test_dynamic_stackalloc_device_control_flow(i32 %n, i32 %m) {
; GFX11-SDAG-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX11-SDAG-NEXT: s_cbranch_execz .LBB15_4
; GFX11-SDAG-NEXT: ; %bb.1: ; %bb.1
-; GFX11-SDAG-NEXT: v_lshl_add_u32 v1, v1, 2, 15
-; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 2
+; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v1, 2, 15
; GFX11-SDAG-NEXT: s_mov_b32 s2, exec_lo
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_and_b32_e32 v1, -16, v1
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_and_b32_e32 v0, -16, v0
; GFX11-SDAG-NEXT: .LBB15_2: ; =>This Inner Loop Header: Depth=1
; GFX11-SDAG-NEXT: s_ctz_i32_b32 s3, s2
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX11-SDAG-NEXT: v_readlane_b32 s4, v1, s3
+; GFX11-SDAG-NEXT: v_readlane_b32 s4, v0, s3
; GFX11-SDAG-NEXT: s_bitset0_b32 s2, s3
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-NEXT: s_max_u32 s1, s1, s4
@@ -2380,13 +2378,14 @@ define void @test_dynamic_stackalloc_device_control_flow(i32 %n, i32 %m) {
; GFX11-SDAG-NEXT: s_cbranch_scc1 .LBB15_2
; GFX11-SDAG-NEXT: ; %bb.3:
; GFX11-SDAG-NEXT: s_add_i32 s2, s32, 0x7ff
-; GFX11-SDAG-NEXT: ; implicit-def: $vgpr31
-; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 2
; GFX11-SDAG-NEXT: s_and_b32 s2, s2, 0xfffff800
-; GFX11-SDAG-NEXT: v_lshl_add_u32 v1, s1, 5, s2
-; GFX11-SDAG-NEXT: scratch_store_b32 off, v0, s2 dlc
+; GFX11-SDAG-NEXT: ; implicit-def: $vgpr31
+; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, s1, 5, s2
+; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s2 dlc
; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v1
+; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v0
; GFX11-SDAG-NEXT: .LBB15_4: ; %Flow
; GFX11-SDAG-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX11-SDAG-NEXT: s_cbranch_execz .LBB15_8
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
index 15be44a335a1d..2a17e5733f597 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
@@ -2585,17 +2585,16 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_agent_scope_
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132-NEXT: v_mov_b32_e32 v3, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_load_b32 s3, s[0:1], 0x0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1132-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mul_f32 v2, 4.0, v0
; GFX1132-NEXT: .LBB4_2: ; %atomicrmw.start
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-NEXT: v_add_f32_e32 v0, v1, v2
+; GFX1132-NEXT: v_dual_add_f32 v0, v1, v2 :: v_dual_mov_b32 v3, 0
; GFX1132-NEXT: global_atomic_cmpswap_b32 v0, v3, v[0:1], s[0:1] glc
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
@@ -2835,17 +2834,16 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_agent_scope_
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_load_b32 s3, s[0:1], 0x0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mul_f32 v2, 4.0, v0
; GFX1132-DPP-NEXT: .LBB4_2: ; %atomicrmw.start
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_add_f32_e32 v0, v1, v2
+; GFX1132-DPP-NEXT: v_dual_add_f32 v0, v1, v2 :: v_dual_mov_b32 v3, 0
; GFX1132-DPP-NEXT: global_atomic_cmpswap_b32 v0, v3, v[0:1], s[0:1] glc
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
@@ -4641,17 +4639,16 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_default_scop
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132-NEXT: v_mov_b32_e32 v3, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_load_b32 s3, s[0:1], 0x0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1132-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mul_f32 v2, 4.0, v0
; GFX1132-NEXT: .LBB7_2: ; %atomicrmw.start
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-NEXT: v_add_f32_e32 v0, v1, v2
+; GFX1132-NEXT: v_dual_add_f32 v0, v1, v2 :: v_dual_mov_b32 v3, 0
; GFX1132-NEXT: global_atomic_cmpswap_b32 v0, v3, v[0:1], s[0:1] glc
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
@@ -4891,17 +4888,16 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_default_scop
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_load_b32 s3, s[0:1], 0x0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mul_f32 v2, 4.0, v0
; GFX1132-DPP-NEXT: .LBB7_2: ; %atomicrmw.start
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_add_f32_e32 v0, v1, v2
+; GFX1132-DPP-NEXT: v_dual_add_f32 v0, v1, v2 :: v_dual_mov_b32 v3, 0
; GFX1132-DPP-NEXT: global_atomic_cmpswap_b32 v0, v3, v[0:1], s[0:1] glc
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
@@ -7932,16 +7928,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v41, v8
-; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v41, v8
+; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_mov_b32_e32 v42, v9
; GFX1132-DPP-NEXT: s_mov_b32 s46, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
-; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
+; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1132-DPP-NEXT: ; %bb.1:
; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45]
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
@@ -8222,10 +8219,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_one_a
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132-NEXT: v_mov_b32_e32 v6, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_mul_f64 v[4:5], 4.0, v[0:1]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
@@ -8233,6 +8229,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_one_a
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_add_f64 v[0:1], v[2:3], v[4:5]
+; GFX1132-NEXT: v_mov_b32_e32 v6, 0
; GFX1132-NEXT: global_atomic_cmpswap_b64 v[0:1], v6, v[0:3], s[0:1] glc
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[2:3]
@@ -8479,10 +8476,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_one_a
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_mul_f64 v[4:5], 4.0, v[0:1]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
@@ -8490,6 +8486,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_one_a
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_add_f64 v[0:1], v[2:3], v[4:5]
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1132-DPP-NEXT: global_atomic_cmpswap_b64 v[0:1], v6, v[0:3], s[0:1] glc
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[2:3]
@@ -9655,10 +9652,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132-NEXT: v_mov_b32_e32 v6, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_mul_f64 v[4:5], 4.0, v[0:1]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
@@ -9666,6 +9662,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_add_f64 v[0:1], v[2:3], v[4:5]
+; GFX1132-NEXT: v_mov_b32_e32 v6, 0
; GFX1132-NEXT: global_atomic_cmpswap_b64 v[0:1], v6, v[0:3], s[0:1] glc
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[2:3]
@@ -9912,10 +9909,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_mul_f64 v[4:5], 4.0, v[0:1]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
@@ -9923,6 +9919,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_add_f64 v[0:1], v[2:3], v[4:5]
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1132-DPP-NEXT: global_atomic_cmpswap_b64 v[0:1], v6, v[0:3], s[0:1] glc
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[2:3]
@@ -13988,16 +13985,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v41, v8
-; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v41, v8
+; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_mov_b32_e32 v42, v9
; GFX1132-DPP-NEXT: s_mov_b32 s46, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
-; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
+; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB17_3
; GFX1132-DPP-NEXT: ; %bb.1:
; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45]
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
index a4410bb9ed2d0..2892641017296 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
@@ -5353,16 +5353,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11]
; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, v8
-; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, v8
+; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, v9
; GFX1132-DPP-NEXT: s_mov_b32 s46, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
+; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB7_3
; GFX1132-DPP-NEXT: ; %bb.1:
; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45]
@@ -5556,15 +5557,15 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_one_a
; GFX1132-NEXT: s_cbranch_execz .LBB8_3
; GFX1132-NEXT: ; %bb.1:
; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132-NEXT: v_mov_b32_e32 v4, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
; GFX1132-NEXT: .LBB8_2: ; %atomicrmw.start
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1132-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3]
+; GFX1132-NEXT: v_mov_b32_e32 v4, 0
; GFX1132-NEXT: v_max_f64 v[0:1], v[0:1], 4.0
; GFX1132-NEXT: global_atomic_cmpswap_b64 v[0:1], v4, v[0:3], s[0:1] glc
; GFX1132-NEXT: s_waitcnt vmcnt(0)
@@ -5722,15 +5723,15 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_one_a
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB8_3
; GFX1132-DPP-NEXT: ; %bb.1:
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
; GFX1132-DPP-NEXT: .LBB8_2: ; %atomicrmw.start
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3]
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1132-DPP-NEXT: v_max_f64 v[0:1], v[0:1], 4.0
; GFX1132-DPP-NEXT: global_atomic_cmpswap_b64 v[0:1], v4, v[0:3], s[0:1] glc
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
@@ -8901,16 +8902,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11]
; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, v8
-; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, v8
+; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, v9
; GFX1132-DPP-NEXT: s_mov_b32 s46, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
+; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB11_3
; GFX1132-DPP-NEXT: ; %bb.1:
; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45]
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
index 68d7dcc60506c..40cca6bfc63cb 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
@@ -5353,16 +5353,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: v_min_f64 v[8:9], v[8:9], v[10:11]
; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, v8
-; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, v8
+; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, v9
; GFX1132-DPP-NEXT: s_mov_b32 s46, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
+; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB7_3
; GFX1132-DPP-NEXT: ; %bb.1:
; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45]
@@ -5556,15 +5557,15 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_one_a
; GFX1132-NEXT: s_cbranch_execz .LBB8_3
; GFX1132-NEXT: ; %bb.1:
; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132-NEXT: v_mov_b32_e32 v4, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
; GFX1132-NEXT: .LBB8_2: ; %atomicrmw.start
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1132-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3]
+; GFX1132-NEXT: v_mov_b32_e32 v4, 0
; GFX1132-NEXT: v_min_f64 v[0:1], v[0:1], 4.0
; GFX1132-NEXT: global_atomic_cmpswap_b64 v[0:1], v4, v[0:3], s[0:1] glc
; GFX1132-NEXT: s_waitcnt vmcnt(0)
@@ -5722,15 +5723,15 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_one_a
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB8_3
; GFX1132-DPP-NEXT: ; %bb.1:
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
; GFX1132-DPP-NEXT: .LBB8_2: ; %atomicrmw.start
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3]
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1132-DPP-NEXT: v_min_f64 v[0:1], v[0:1], 4.0
; GFX1132-DPP-NEXT: global_atomic_cmpswap_b64 v[0:1], v4, v[0:3], s[0:1] glc
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
@@ -8901,16 +8902,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: v_min_f64 v[8:9], v[8:9], v[10:11]
; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, v8
-; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, v8
+; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, v9
; GFX1132-DPP-NEXT: s_mov_b32 s46, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
+; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB11_3
; GFX1132-DPP-NEXT: ; %bb.1:
; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45]
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
index 7126680525b87..e2d58b80e01f7 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
@@ -195,17 +195,16 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_
; GFX1132-NEXT: ; %bb.1:
; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1132-NEXT: s_bcnt1_i32_b32 s3, s3
-; GFX1132-NEXT: v_mov_b32_e32 v3, 0
+; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX1132-NEXT: v_cvt_f32_ubyte0_e32 v0, s3
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_load_b32 s4, s[0:1], 0x0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_dual_mul_f32 v2, 4.0, v0 :: v_dual_mov_b32 v1, s4
; GFX1132-NEXT: .LBB0_2: ; %atomicrmw.start
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-NEXT: v_sub_f32_e32 v0, v1, v2
+; GFX1132-NEXT: v_dual_sub_f32 v0, v1, v2 :: v_dual_mov_b32 v3, 0
; GFX1132-NEXT: global_atomic_cmpswap_b32 v0, v3, v[0:1], s[0:1] glc
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
@@ -396,17 +395,16 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_
; GFX1132-DPP-NEXT: ; %bb.1:
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1132-DPP-NEXT: s_bcnt1_i32_b32 s3, s3
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, 0
+; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_cvt_f32_ubyte0_e32 v0, s3
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_load_b32 s4, s[0:1], 0x0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_dual_mul_f32 v2, 4.0, v0 :: v_dual_mov_b32 v1, s4
; GFX1132-DPP-NEXT: .LBB0_2: ; %atomicrmw.start
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_sub_f32_e32 v0, v1, v2
+; GFX1132-DPP-NEXT: v_dual_sub_f32 v0, v1, v2 :: v_dual_mov_b32 v3, 0
; GFX1132-DPP-NEXT: global_atomic_cmpswap_b32 v0, v3, v[0:1], s[0:1] glc
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
@@ -1477,17 +1475,16 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_one_as_scope
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132-NEXT: v_mov_b32_e32 v3, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_load_b32 s3, s[0:1], 0x0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1132-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mul_f32 v2, 4.0, v0
; GFX1132-NEXT: .LBB2_2: ; %atomicrmw.start
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-NEXT: v_sub_f32_e32 v0, v1, v2
+; GFX1132-NEXT: v_dual_sub_f32 v0, v1, v2 :: v_dual_mov_b32 v3, 0
; GFX1132-NEXT: global_atomic_cmpswap_b32 v0, v3, v[0:1], s[0:1] glc
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
@@ -1727,17 +1724,16 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_one_as_scope
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_load_b32 s3, s[0:1], 0x0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mul_f32 v2, 4.0, v0
; GFX1132-DPP-NEXT: .LBB2_2: ; %atomicrmw.start
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_sub_f32_e32 v0, v1, v2
+; GFX1132-DPP-NEXT: v_dual_sub_f32 v0, v1, v2 :: v_dual_mov_b32 v3, 0
; GFX1132-DPP-NEXT: global_atomic_cmpswap_b32 v0, v3, v[0:1], s[0:1] glc
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
@@ -2809,17 +2805,16 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132-NEXT: v_mov_b32_e32 v3, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_load_b32 s3, s[0:1], 0x0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1132-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mul_f32 v2, 4.0, v0
; GFX1132-NEXT: .LBB4_2: ; %atomicrmw.start
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-NEXT: v_sub_f32_e32 v0, v1, v2
+; GFX1132-NEXT: v_dual_sub_f32 v0, v1, v2 :: v_dual_mov_b32 v3, 0
; GFX1132-NEXT: global_atomic_cmpswap_b32 v0, v3, v[0:1], s[0:1] glc
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
@@ -3059,17 +3054,16 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_load_b32 s3, s[0:1], 0x0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mul_f32 v2, 4.0, v0
; GFX1132-DPP-NEXT: .LBB4_2: ; %atomicrmw.start
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_sub_f32_e32 v0, v1, v2
+; GFX1132-DPP-NEXT: v_dual_sub_f32 v0, v1, v2 :: v_dual_mov_b32 v3, 0
; GFX1132-DPP-NEXT: global_atomic_cmpswap_b32 v0, v3, v[0:1], s[0:1] glc
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
@@ -4969,17 +4963,16 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_default_scop
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132-NEXT: v_mov_b32_e32 v3, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_load_b32 s3, s[0:1], 0x0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1132-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mul_f32 v2, 4.0, v0
; GFX1132-NEXT: .LBB7_2: ; %atomicrmw.start
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-NEXT: v_sub_f32_e32 v0, v1, v2
+; GFX1132-NEXT: v_dual_sub_f32 v0, v1, v2 :: v_dual_mov_b32 v3, 0
; GFX1132-NEXT: global_atomic_cmpswap_b32 v0, v3, v[0:1], s[0:1] glc
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
@@ -5219,17 +5212,16 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_default_scop
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_load_b32 s3, s[0:1], 0x0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mul_f32 v2, 4.0, v0
; GFX1132-DPP-NEXT: .LBB7_2: ; %atomicrmw.start
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_sub_f32_e32 v0, v1, v2
+; GFX1132-DPP-NEXT: v_dual_sub_f32 v0, v1, v2 :: v_dual_mov_b32 v3, 0
; GFX1132-DPP-NEXT: global_atomic_cmpswap_b32 v0, v3, v[0:1], s[0:1] glc
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
@@ -8260,16 +8252,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v41, v8
-; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v41, v8
+; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_mov_b32_e32 v42, v9
; GFX1132-DPP-NEXT: s_mov_b32 s46, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
-; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
+; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1132-DPP-NEXT: ; %bb.1:
; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45]
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
@@ -8550,10 +8543,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_one_a
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132-NEXT: v_mov_b32_e32 v6, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_mul_f64 v[4:5], 4.0, v[0:1]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
@@ -8561,6 +8553,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_one_a
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_add_f64 v[0:1], v[2:3], -v[4:5]
+; GFX1132-NEXT: v_mov_b32_e32 v6, 0
; GFX1132-NEXT: global_atomic_cmpswap_b64 v[0:1], v6, v[0:3], s[0:1] glc
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[2:3]
@@ -8807,10 +8800,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_one_a
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_mul_f64 v[4:5], 4.0, v[0:1]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
@@ -8818,6 +8810,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_one_a
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_add_f64 v[0:1], v[2:3], -v[4:5]
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1132-DPP-NEXT: global_atomic_cmpswap_b64 v[0:1], v6, v[0:3], s[0:1] glc
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[2:3]
@@ -9982,10 +9975,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132-NEXT: v_mov_b32_e32 v6, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_mul_f64 v[4:5], 4.0, v[0:1]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
@@ -9993,6 +9985,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_add_f64 v[0:1], v[2:3], -v[4:5]
+; GFX1132-NEXT: v_mov_b32_e32 v6, 0
; GFX1132-NEXT: global_atomic_cmpswap_b64 v[0:1], v6, v[0:3], s[0:1] glc
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[2:3]
@@ -10239,10 +10232,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_mul_f64 v[4:5], 4.0, v[0:1]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
@@ -10250,6 +10242,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_add_f64 v[0:1], v[2:3], -v[4:5]
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1132-DPP-NEXT: global_atomic_cmpswap_b64 v[0:1], v6, v[0:3], s[0:1] glc
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[2:3]
@@ -14314,16 +14307,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v41, v8
-; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v41, v8
+; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_mov_b32_e32 v42, v9
; GFX1132-DPP-NEXT: s_mov_b32 s46, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
-; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
+; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB17_3
; GFX1132-DPP-NEXT: ; %bb.1:
; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45]
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
diff --git a/llvm/test/CodeGen/AMDGPU/idiv-licm.ll b/llvm/test/CodeGen/AMDGPU/idiv-licm.ll
index 5fab0c50bbe57..343780fde306f 100644
--- a/llvm/test/CodeGen/AMDGPU/idiv-licm.ll
+++ b/llvm/test/CodeGen/AMDGPU/idiv-licm.ll
@@ -565,22 +565,23 @@ define amdgpu_kernel void @srem32_invariant_denom(ptr addrspace(1) nocapture %ar
;
; GFX11-LABEL: srem32_invariant_denom:
; GFX11: ; %bb.0: ; %bb
-; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x2c
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_abs_i32 s2, s0
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_abs_i32 s2, s2
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f32_u32_e32 v0, s2
; GFX11-NEXT: s_sub_i32 s3, 0, s2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_readfirstlane_b32 s4, v0
; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_mul_i32 s3, s3, s4
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_mul_hi_u32 s5, s4, s3
; GFX11-NEXT: s_mov_b32 s3, 0
; GFX11-NEXT: s_add_i32 s4, s4, s5
@@ -601,7 +602,6 @@ define amdgpu_kernel void @srem32_invariant_denom(ptr addrspace(1) nocapture %ar
; GFX11-NEXT: s_cselect_b32 s5, s6, s5
; GFX11-NEXT: s_add_i32 s3, s3, 1
; GFX11-NEXT: v_mov_b32_e32 v1, s5
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_add_u32 s0, s0, 4
; GFX11-NEXT: s_addc_u32 s1, s1, 0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
index f72f1e52d135f..0317df6f6a044 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
@@ -463,7 +463,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX1132DAGISEL-LABEL: divergent_value:
; GFX1132DAGISEL: ; %bb.0: ; %entry
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0
; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
@@ -476,7 +476,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
index 4551c60770bdf..5ec0224898bfd 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
@@ -464,7 +464,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-LABEL: divergent_value:
; GFX1132DAGISEL: ; %bb.0: ; %entry
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, -1
; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
@@ -477,7 +477,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.atomic.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.atomic.buffer.load.ll
index eb2d95e4db2d5..e16ac07236e30 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.atomic.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.atomic.buffer.load.ll
@@ -38,14 +38,15 @@ define amdgpu_kernel void @struct_atomic_buffer_load_i32_const_idx(<4 x i32> %ad
; CHECK-LABEL: struct_atomic_buffer_load_i32_const_idx:
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; CHECK-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
+; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: .LBB1_1: ; %bb1
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: v_mov_b32_e32 v1, 15
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: buffer_load_b32 v2, v1, s[0:3], 0 idxen glc
+; CHECK-NEXT: buffer_load_b32 v1, v1, s[0:3], 0 idxen glc
; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v0
+; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, v1, v0
; CHECK-NEXT: s_or_b32 s4, vcc_lo, s4
; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; CHECK-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.atomic.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.atomic.buffer.load.ll
index bc50b12b59049..cc34020e0a28a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.atomic.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.atomic.buffer.load.ll
@@ -38,14 +38,15 @@ define amdgpu_kernel void @struct_ptr_atomic_buffer_load_i32_const_idx(ptr addrs
; CHECK-LABEL: struct_ptr_atomic_buffer_load_i32_const_idx:
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; CHECK-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
+; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: .LBB1_1: ; %bb1
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: v_mov_b32_e32 v1, 15
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: buffer_load_b32 v2, v1, s[0:3], 0 idxen glc
+; CHECK-NEXT: buffer_load_b32 v1, v1, s[0:3], 0 idxen glc
; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v0
+; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, v1, v0
; CHECK-NEXT: s_or_b32 s4, vcc_lo, s4
; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; CHECK-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4
diff --git a/llvm/test/CodeGen/AMDGPU/loop-prefetch-data.ll b/llvm/test/CodeGen/AMDGPU/loop-prefetch-data.ll
index dd77e575e7505..506753f8640e1 100644
--- a/llvm/test/CodeGen/AMDGPU/loop-prefetch-data.ll
+++ b/llvm/test/CodeGen/AMDGPU/loop-prefetch-data.ll
@@ -100,7 +100,7 @@ define amdgpu_kernel void @copy_constant(ptr addrspace(1) nocapture %d, ptr addr
; GCN-NEXT: s_cbranch_scc1 .LBB2_3
; GCN-NEXT: ; %bb.1: ; %for.body.preheader
; GCN-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: v_mov_b32_e32 v4, 0
; GCN-NEXT: .LBB2_2: ; %for.body
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: s_wait_kmcnt 0x0
@@ -110,9 +110,9 @@ define amdgpu_kernel void @copy_constant(ptr addrspace(1) nocapture %d, ptr addr
; GCN-NEXT: s_add_nc_u64 s[2:3], s[2:3], 16
; GCN-NEXT: s_cmp_lg_u32 s6, 0
; GCN-NEXT: s_wait_kmcnt 0x0
-; GCN-NEXT: v_dual_mov_b32 v1, s8 :: v_dual_mov_b32 v2, s9
-; GCN-NEXT: v_dual_mov_b32 v3, s10 :: v_dual_mov_b32 v4, s11
-; GCN-NEXT: global_store_b128 v0, v[1:4], s[0:1]
+; GCN-NEXT: v_dual_mov_b32 v0, s8 :: v_dual_mov_b32 v1, s9
+; GCN-NEXT: v_dual_mov_b32 v2, s10 :: v_dual_mov_b32 v3, s11
+; GCN-NEXT: global_store_b128 v4, v[0:3], s[0:1]
; GCN-NEXT: s_add_nc_u64 s[0:1], s[0:1], 16
; GCN-NEXT: s_cbranch_scc1 .LBB2_2
; GCN-NEXT: .LBB2_3: ; %for.end
diff --git a/llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll b/llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll
index d62f045674ace..bf1ab9a07d5f6 100644
--- a/llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll
+++ b/llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll
@@ -54,7 +54,6 @@ define amdgpu_cs void @_amdgpu_cs_main(float %0, i32 %1) {
;
; GFX12-LABEL: _amdgpu_cs_main:
; GFX12: ; %bb.0: ; %branch1_true
-; GFX12-NEXT: v_mov_b32_e32 v2, 0
; GFX12-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
; GFX12-NEXT: v_mov_b32_e32 v1, 0
; GFX12-NEXT: s_mov_b32 s4, 0
@@ -77,6 +76,7 @@ define amdgpu_cs void @_amdgpu_cs_main(float %0, i32 %1) {
; GFX12-NEXT: s_cbranch_execz .LBB0_1
; GFX12-NEXT: ; %bb.3: ; %branch2_merge
; GFX12-NEXT: ; in Loop: Header=BB0_2 Depth=1
+; GFX12-NEXT: v_mov_b32_e32 v2, 0
; GFX12-NEXT: s_mov_b32 s5, s4
; GFX12-NEXT: s_mov_b32 s6, s4
; GFX12-NEXT: s_mov_b32 s7, s4
diff --git a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll
index 538ce15979de8..ffa0cf5b9b3e0 100644
--- a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll
+++ b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll
@@ -76,7 +76,7 @@
; CHECK-NEXT: .lds_size: 0
; CHECK-NEXT: .sgpr_count: 0x22
; CHECK-NEXT: .stack_frame_size_in_bytes: 0x10
-; CHECK-NEXT: .vgpr_count: 0x3
+; CHECK-NEXT: .vgpr_count: 0x2
; CHECK-NEXT: multiple_stack:
; CHECK-NEXT: .backend_stack_size: 0x24
; CHECK-NEXT: .lds_size: 0
>From 9f4968127217ba277d7ced9272a84e3ad980ae7d Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Sun, 9 Feb 2025 18:45:49 +0100
Subject: [PATCH 03/10] Fix broken unit test
---
llvm/test/CodeGen/AMDGPU/idiv-licm.ll | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/idiv-licm.ll b/llvm/test/CodeGen/AMDGPU/idiv-licm.ll
index 343780fde306f..5fab0c50bbe57 100644
--- a/llvm/test/CodeGen/AMDGPU/idiv-licm.ll
+++ b/llvm/test/CodeGen/AMDGPU/idiv-licm.ll
@@ -565,23 +565,22 @@ define amdgpu_kernel void @srem32_invariant_denom(ptr addrspace(1) nocapture %ar
;
; GFX11-LABEL: srem32_invariant_denom:
; GFX11: ; %bb.0: ; %bb
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c
-; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x2c
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_abs_i32 s2, s2
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT: s_abs_i32 s2, s0
+; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX11-NEXT: v_cvt_f32_u32_e32 v0, s2
; GFX11-NEXT: s_sub_i32 s3, 0, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_readfirstlane_b32 s4, v0
; GFX11-NEXT: v_mov_b32_e32 v0, 0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_mul_i32 s3, s3, s4
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_mul_hi_u32 s5, s4, s3
; GFX11-NEXT: s_mov_b32 s3, 0
; GFX11-NEXT: s_add_i32 s4, s4, s5
@@ -602,6 +601,7 @@ define amdgpu_kernel void @srem32_invariant_denom(ptr addrspace(1) nocapture %ar
; GFX11-NEXT: s_cselect_b32 s5, s6, s5
; GFX11-NEXT: s_add_i32 s3, s3, 1
; GFX11-NEXT: v_mov_b32_e32 v1, s5
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_add_u32 s0, s0, 4
; GFX11-NEXT: s_addc_u32 s1, s1, 0
>From daf47238162a0f6e5465725cd24001b1854d46b9 Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Mon, 10 Feb 2025 13:43:44 +0000
Subject: [PATCH 04/10] Reintroduce missing tests and fix existing one
---
llvm/test/CodeGen/AMDGPU/idiv-licm.ll | 123 +-
.../machine-scheduler-sink-trivial-remats.mir | 1260 +++++++++++++++++
2 files changed, 1322 insertions(+), 61 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/idiv-licm.ll b/llvm/test/CodeGen/AMDGPU/idiv-licm.ll
index 5fab0c50bbe57..c71d080b066b0 100644
--- a/llvm/test/CodeGen/AMDGPU/idiv-licm.ll
+++ b/llvm/test/CodeGen/AMDGPU/idiv-licm.ll
@@ -565,22 +565,23 @@ define amdgpu_kernel void @srem32_invariant_denom(ptr addrspace(1) nocapture %ar
;
; GFX11-LABEL: srem32_invariant_denom:
; GFX11: ; %bb.0: ; %bb
-; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x2c
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_abs_i32 s2, s0
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_abs_i32 s2, s2
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f32_u32_e32 v0, s2
; GFX11-NEXT: s_sub_i32 s3, 0, s2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_readfirstlane_b32 s4, v0
; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_mul_i32 s3, s3, s4
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_mul_hi_u32 s5, s4, s3
; GFX11-NEXT: s_mov_b32 s3, 0
; GFX11-NEXT: s_add_i32 s4, s4, s5
@@ -601,7 +602,6 @@ define amdgpu_kernel void @srem32_invariant_denom(ptr addrspace(1) nocapture %ar
; GFX11-NEXT: s_cselect_b32 s5, s6, s5
; GFX11-NEXT: s_add_i32 s3, s3, 1
; GFX11-NEXT: v_mov_b32_e32 v1, s5
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_add_u32 s0, s0, 4
; GFX11-NEXT: s_addc_u32 s1, s1, 0
@@ -694,31 +694,32 @@ define amdgpu_kernel void @udiv16_invariant_denom(ptr addrspace(1) nocapture %ar
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_and_b32 s2, s2, 0xffff
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_cvt_f32_u32_e32 v0, s2
; GFX11-NEXT: s_mov_b32 s2, 0
-; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v0
; GFX11-NEXT: .p2align 6
; GFX11-NEXT: .LBB4_1: ; %bb3
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v0
; GFX11-NEXT: s_and_b32 s3, 0xffff, s2
; GFX11-NEXT: s_add_i32 s2, s2, 1
; GFX11-NEXT: v_cvt_f32_u32_e32 v2, s3
; GFX11-NEXT: s_lshl_b32 s3, s3, 1
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_mov_b32_e32 v4, s3
+; GFX11-NEXT: v_mov_b32_e32 v3, s3
; GFX11-NEXT: s_and_b32 s3, s2, 0xffff
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v3, v2, v1
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-NEXT: s_cmpk_eq_i32 s3, 0x400
-; GFX11-NEXT: v_trunc_f32_e32 v3, v3
+; GFX11-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NEXT: v_mul_f32_e32 v1, v2, v1
+; GFX11-NEXT: v_trunc_f32_e32 v1, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_fma_f32 v2, -v3, v0, v2
-; GFX11-NEXT: v_cvt_u32_f32_e32 v3, v3
+; GFX11-NEXT: v_fma_f32 v2, -v1, v0, v2
+; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v1
; GFX11-NEXT: v_cmp_ge_f32_e64 vcc_lo, |v2|, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, 0, v3, vcc_lo
-; GFX11-NEXT: global_store_b16 v4, v2, s[0:1]
+; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX11-NEXT: global_store_b16 v3, v1, s[0:1]
; GFX11-NEXT: s_cbranch_scc0 .LBB4_1
; GFX11-NEXT: ; %bb.2: ; %bb2
; GFX11-NEXT: s_endpgm
@@ -812,33 +813,34 @@ define amdgpu_kernel void @urem16_invariant_denom(ptr addrspace(1) nocapture %ar
; GFX11-NEXT: s_mov_b32 s3, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_and_b32 s2, s2, 0xffff
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_cvt_f32_u32_e32 v0, s2
-; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v0
; GFX11-NEXT: .p2align 6
; GFX11-NEXT: .LBB5_1: ; %bb3
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v0
; GFX11-NEXT: s_and_b32 s4, 0xffff, s3
; GFX11-NEXT: s_add_i32 s3, s3, 1
; GFX11-NEXT: v_cvt_f32_u32_e32 v2, s4
; GFX11-NEXT: s_lshl_b32 s5, s4, 1
; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v3, v2, v1
+; GFX11-NEXT: v_mul_f32_e32 v1, v2, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_trunc_f32_e32 v3, v3
-; GFX11-NEXT: v_fma_f32 v2, -v3, v0, v2
-; GFX11-NEXT: v_cvt_u32_f32_e32 v3, v3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_trunc_f32_e32 v1, v1
+; GFX11-NEXT: v_fma_f32 v2, -v1, v0, v2
+; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_cmp_ge_f32_e64 vcc_lo, |v2|, v0
-; GFX11-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, 0, v3, vcc_lo
-; GFX11-NEXT: v_mov_b32_e32 v3, s5
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_mul_lo_u32 v2, v2, s2
-; GFX11-NEXT: v_sub_nc_u32_e32 v2, s4, v2
+; GFX11-NEXT: v_mov_b32_e32 v2, s5
+; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_mul_lo_u32 v1, v1, s2
+; GFX11-NEXT: v_sub_nc_u32_e32 v1, s4, v1
; GFX11-NEXT: s_and_b32 s4, s3, 0xffff
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_cmpk_eq_i32 s4, 0x400
-; GFX11-NEXT: global_store_b16 v3, v2, s[0:1]
+; GFX11-NEXT: global_store_b16 v2, v1, s[0:1]
; GFX11-NEXT: s_cbranch_scc0 .LBB5_1
; GFX11-NEXT: ; %bb.2: ; %bb2
; GFX11-NEXT: s_endpgm
@@ -940,38 +942,37 @@ define amdgpu_kernel void @sdiv16_invariant_denom(ptr addrspace(1) nocapture %ar
; GFX11-NEXT: s_mov_b32 s3, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_sext_i32_i16 s2, s2
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_cvt_f32_i32_e32 v0, s2
-; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v0
; GFX11-NEXT: .p2align 6
; GFX11-NEXT: .LBB6_1: ; %bb3
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v0
; GFX11-NEXT: s_sext_i32_i16 s4, s3
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: v_cvt_f32_i32_e32 v2, s4
; GFX11-NEXT: s_xor_b32 s4, s4, s2
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_ashr_i32 s4, s4, 30
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-NEXT: s_or_b32 s4, s4, 1
; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v3, v2, v1
-; GFX11-NEXT: v_trunc_f32_e32 v3, v3
+; GFX11-NEXT: v_mul_f32_e32 v1, v2, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_fma_f32 v2, -v3, v0, v2
+; GFX11-NEXT: v_trunc_f32_e32 v1, v1
+; GFX11-NEXT: v_fma_f32 v2, -v1, v0, v2
+; GFX11-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cmp_ge_f32_e64 s5, |v2|, |v0|
-; GFX11-NEXT: v_cvt_i32_f32_e32 v2, v3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-NEXT: s_and_b32 s5, s5, exec_lo
; GFX11-NEXT: s_cselect_b32 s4, s4, 0
; GFX11-NEXT: s_and_b32 s5, 0xffff, s3
-; GFX11-NEXT: v_add_nc_u32_e32 v2, s4, v2
-; GFX11-NEXT: s_lshl_b32 s5, s5, 1
; GFX11-NEXT: s_add_i32 s3, s3, 1
-; GFX11-NEXT: v_mov_b32_e32 v3, s5
+; GFX11-NEXT: s_lshl_b32 s5, s5, 1
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_add_nc_u32 v1, s4, v1
; GFX11-NEXT: s_and_b32 s4, s3, 0xffff
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_cmpk_eq_i32 s4, 0x400
-; GFX11-NEXT: global_store_b16 v3, v2, s[0:1]
+; GFX11-NEXT: global_store_b16 v2, v1, s[0:1]
; GFX11-NEXT: s_cbranch_scc0 .LBB6_1
; GFX11-NEXT: ; %bb.2: ; %bb2
; GFX11-NEXT: s_endpgm
@@ -1077,42 +1078,42 @@ define amdgpu_kernel void @srem16_invariant_denom(ptr addrspace(1) nocapture %ar
; GFX11-NEXT: s_mov_b32 s3, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_sext_i32_i16 s2, s2
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_cvt_f32_i32_e32 v0, s2
-; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v0
; GFX11-NEXT: .p2align 6
; GFX11-NEXT: .LBB7_1: ; %bb3
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v0
; GFX11-NEXT: s_sext_i32_i16 s4, s3
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: v_cvt_f32_i32_e32 v2, s4
; GFX11-NEXT: s_xor_b32 s5, s4, s2
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_ashr_i32 s5, s5, 30
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-NEXT: s_or_b32 s5, s5, 1
; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v3, v2, v1
-; GFX11-NEXT: v_trunc_f32_e32 v3, v3
+; GFX11-NEXT: v_mul_f32_e32 v1, v2, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_fma_f32 v2, -v3, v0, v2
+; GFX11-NEXT: v_trunc_f32_e32 v1, v1
+; GFX11-NEXT: v_fma_f32 v2, -v1, v0, v2
+; GFX11-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cmp_ge_f32_e64 s6, |v2|, |v0|
-; GFX11-NEXT: v_cvt_i32_f32_e32 v2, v3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: s_and_b32 s6, s6, exec_lo
; GFX11-NEXT: s_cselect_b32 s5, s5, 0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT: v_add_nc_u32_e32 v2, s5, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT: v_add_nc_u32_e32 v1, s5, v1
; GFX11-NEXT: s_and_b32 s5, 0xffff, s3
; GFX11-NEXT: s_add_i32 s3, s3, 1
; GFX11-NEXT: s_lshl_b32 s5, s5, 1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_mul_lo_u32 v2, v2, s2
-; GFX11-NEXT: v_mov_b32_e32 v3, s5
-; GFX11-NEXT: v_sub_nc_u32_e32 v2, s4, v2
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_mov_b32_e32 v2, s5
+; GFX11-NEXT: v_mul_lo_u32 v1, v1, s2
+; GFX11-NEXT: v_sub_nc_u32_e32 v1, s4, v1
; GFX11-NEXT: s_and_b32 s4, s3, 0xffff
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_cmpk_eq_i32 s4, 0x400
-; GFX11-NEXT: global_store_b16 v3, v2, s[0:1]
+; GFX11-NEXT: global_store_b16 v2, v1, s[0:1]
; GFX11-NEXT: s_cbranch_scc0 .LBB7_1
; GFX11-NEXT: ; %bb.2: ; %bb2
; GFX11-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
index e5a68dd2e26ef..c3c3cb0bf94ec 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
@@ -7446,6 +7446,7 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_8]], implicit [[V_CVT_I32_F32_e32_16]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_17]], implicit [[V_CVT_I32_F32_e32_18]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_19]], implicit [[V_CVT_I32_F32_e32_20]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_19]], implicit [[V_CVT_I32_F32_e32_20]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_21]], implicit [[V_CVT_I32_F32_e32_22]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_24]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_25]], implicit [[V_CVT_I32_F32_e32_26]]
@@ -7544,6 +7545,7 @@ body: |
S_NOP 0, implicit %59, implicit %69
S_NOP 0, implicit %70, implicit %71
S_NOP 0, implicit %72, implicit %73
+ S_NOP 0, implicit %72, implicit %73
S_NOP 0, implicit %74, implicit %75
S_NOP 0, implicit %76, implicit %77
S_NOP 0, implicit %78, implicit %79
@@ -7551,4 +7553,1262 @@ body: |
$exec = S_MOV_B64 %101:sreg_64_xexec
S_ENDPGM 0
...
+---
+name: remat_virtual_vgpr_occ_6
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; GFX908-LABEL: name: remat_virtual_vgpr_occ_6
+ ; GFX908: bb.0:
+ ; GFX908-NEXT: successors: %bb.1(0x80000000)
+ ; GFX908-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+ ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF31:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
+ ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+ ; GFX908-NEXT: S_BRANCH %bb.1
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.1:
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_26]], implicit [[V_CVT_I32_F32_e32_27]], implicit [[DEF22]], implicit [[DEF27]]
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_28]], implicit [[V_CVT_I32_F32_e32_22]], implicit [[DEF23]], implicit [[DEF28]]
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_29]], implicit [[V_CVT_I32_F32_e32_23]], implicit [[DEF24]], implicit [[DEF29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_30]], implicit [[V_CVT_I32_F32_e32_24]], implicit [[DEF25]], implicit [[DEF30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_31]], implicit [[V_CVT_I32_F32_e32_25]], implicit [[DEF26]], implicit [[DEF31]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_6]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_7]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_8]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_9]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_10]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_11]], implicit [[V_CVT_I32_F32_e32_12]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_13]], implicit [[V_CVT_I32_F32_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_13]], implicit [[V_CVT_I32_F32_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_15]], implicit [[V_CVT_I32_F32_e32_16]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_17]], implicit [[V_CVT_I32_F32_e32_18]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_19]], implicit [[V_CVT_I32_F32_e32_20]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_21]]
+ ; GFX908-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr0_sgpr1
+
+ %1:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ %2:vgpr_32(s32) = COPY $vgpr0
+ %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ %4:sreg_64 = V_CMP_GT_U32_e64 %3.sub0, %2(s32), implicit $exec
+ undef %5.sub1:sreg_64 = S_MOV_B32 0
+ %5.sub0:sreg_64 = COPY %3.sub1
+ %10:vgpr_32 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vgpr_32 = IMPLICIT_DEF
+ %13:vgpr_32 = IMPLICIT_DEF
+ %14:vgpr_32 = IMPLICIT_DEF
+ %15:vgpr_32 = IMPLICIT_DEF
+ %16:vgpr_32 = IMPLICIT_DEF
+ %17:vgpr_32 = IMPLICIT_DEF
+ %18:vgpr_32 = IMPLICIT_DEF
+ %19:vgpr_32 = IMPLICIT_DEF
+ %20:vgpr_32 = IMPLICIT_DEF
+ %21:vgpr_32 = IMPLICIT_DEF
+ %22:vgpr_32 = IMPLICIT_DEF
+ %23:vgpr_32 = IMPLICIT_DEF
+ %24:vgpr_32 = IMPLICIT_DEF
+ %25:vgpr_32 = IMPLICIT_DEF
+ %26:vgpr_32 = IMPLICIT_DEF
+ %27:vgpr_32 = IMPLICIT_DEF
+ %28:vgpr_32 = IMPLICIT_DEF
+ %29:vgpr_32 = IMPLICIT_DEF
+ %30:vgpr_32 = IMPLICIT_DEF
+ %31:vgpr_32 = IMPLICIT_DEF
+ %32:vgpr_32 = IMPLICIT_DEF
+ %33:vgpr_32 = IMPLICIT_DEF
+ %34:vgpr_32 = IMPLICIT_DEF
+ %35:vgpr_32 = IMPLICIT_DEF
+ %36:vgpr_32 = IMPLICIT_DEF
+ %37:vgpr_32 = IMPLICIT_DEF
+ %38:vgpr_32 = IMPLICIT_DEF
+ %39:vgpr_32 = IMPLICIT_DEF
+ %40:vgpr_32 = IMPLICIT_DEF
+ %41:vgpr_32 = IMPLICIT_DEF
+ %50:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %10, implicit $exec, implicit $mode
+ %51:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %11, implicit $exec, implicit $mode
+ %52:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %12, implicit $exec, implicit $mode
+ %53:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %13, implicit $exec, implicit $mode
+ %54:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %14, implicit $exec, implicit $mode
+ %55:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %15, implicit $exec, implicit $mode
+ %56:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %16, implicit $exec, implicit $mode
+ %57:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %17, implicit $exec, implicit $mode
+ %58:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %18, implicit $exec, implicit $mode
+ %59:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %19, implicit $exec, implicit $mode
+ %60:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %20, implicit $exec, implicit $mode
+ %61:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %21, implicit $exec, implicit $mode
+ %62:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %22, implicit $exec, implicit $mode
+ %63:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %23, implicit $exec, implicit $mode
+ %64:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %24, implicit $exec, implicit $mode
+ %65:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %25, implicit $exec, implicit $mode
+ %66:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %26, implicit $exec, implicit $mode
+ %67:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %27, implicit $exec, implicit $mode
+ %68:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %28, implicit $exec, implicit $mode
+ %69:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %29, implicit $exec, implicit $mode
+ %70:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %30, implicit $exec, implicit $mode
+ %71:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %31, implicit $exec, implicit $mode
+ %72:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %32, implicit $exec, implicit $mode
+ %73:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %33, implicit $exec, implicit $mode
+ %74:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %34, implicit $exec, implicit $mode
+ %75:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %35, implicit $exec, implicit $mode
+ %76:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %36, implicit $exec, implicit $mode
+ %77:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %37, implicit $exec, implicit $mode
+ %78:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %38, implicit $exec, implicit $mode
+ %79:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %39, implicit $exec, implicit $mode
+ %80:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %40, implicit $exec, implicit $mode
+ %81:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %41, implicit $exec, implicit $mode
+ S_BRANCH %bb.4
+
+ bb.4:
+ S_NOP 0, implicit %50, implicit %60, implicit %10, implicit %20
+ S_NOP 0, implicit %51, implicit %61, implicit %11, implicit %21
+ S_NOP 0, implicit %52, implicit %62, implicit %12, implicit %22
+ S_NOP 0, implicit %53, implicit %63, implicit %13, implicit %23
+ S_NOP 0, implicit %54, implicit %64, implicit %14, implicit %24
+ S_NOP 0, implicit %55, implicit %65
+ S_NOP 0, implicit %56, implicit %66
+ S_NOP 0, implicit %57, implicit %67
+ S_NOP 0, implicit %58, implicit %68
+ S_NOP 0, implicit %59, implicit %69
+ S_NOP 0, implicit %70, implicit %71
+ S_NOP 0, implicit %72, implicit %73
+ S_NOP 0, implicit %72, implicit %73
+ S_NOP 0, implicit %74, implicit %75
+ S_NOP 0, implicit %76, implicit %77
+ S_NOP 0, implicit %78, implicit %79
+ S_NOP 0, implicit %80
+ S_ENDPGM 0
+...
+
+---
+name: remat_virtual_vgpr_uses_not_available
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; GFX908-LABEL: name: remat_virtual_vgpr_uses_not_available
+ ; GFX908: bb.0:
+ ; GFX908-NEXT: successors: %bb.1(0x80000000)
+ ; GFX908-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+ ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF31:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
+ ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+ ; GFX908-NEXT: S_BRANCH %bb.1
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.1:
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_30]], implicit [[V_CVT_I32_F32_e32_31]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_6]], implicit [[DEF22]], implicit [[DEF27]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_22]], implicit [[V_CVT_I32_F32_e32_26]], implicit [[DEF23]], implicit [[DEF28]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_27]], implicit [[DEF24]], implicit [[DEF29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_24]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF25]], implicit [[DEF30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_25]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF26]], implicit [[DEF31]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_7]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_8]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_9]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_10]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_11]], implicit [[V_CVT_I32_F32_e32_12]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_13]], implicit [[V_CVT_I32_F32_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_13]], implicit [[V_CVT_I32_F32_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_15]], implicit [[V_CVT_I32_F32_e32_16]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_17]], implicit [[V_CVT_I32_F32_e32_18]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_19]], implicit [[V_CVT_I32_F32_e32_20]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_21]]
+ ; GFX908-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr0_sgpr1
+ %1:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ %2:vgpr_32(s32) = COPY $vgpr0
+ %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ %4:sreg_64 = V_CMP_GT_U32_e64 %3.sub0, %2(s32), implicit $exec
+ undef %5.sub1:sreg_64 = S_MOV_B32 0
+ %5.sub0:sreg_64 = COPY %3.sub1
+ %10:vgpr_32 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vgpr_32 = IMPLICIT_DEF
+ %13:vgpr_32 = IMPLICIT_DEF
+ %14:vgpr_32 = IMPLICIT_DEF
+ %15:vgpr_32 = IMPLICIT_DEF
+ %16:vgpr_32 = IMPLICIT_DEF
+ %17:vgpr_32 = IMPLICIT_DEF
+ %18:vgpr_32 = IMPLICIT_DEF
+ %19:vgpr_32 = IMPLICIT_DEF
+ %20:vgpr_32 = IMPLICIT_DEF
+ %21:vgpr_32 = IMPLICIT_DEF
+ %22:vgpr_32 = IMPLICIT_DEF
+ %23:vgpr_32 = IMPLICIT_DEF
+ %24:vgpr_32 = IMPLICIT_DEF
+ %25:vgpr_32 = IMPLICIT_DEF
+ %26:vgpr_32 = IMPLICIT_DEF
+ %27:vgpr_32 = IMPLICIT_DEF
+ %28:vgpr_32 = IMPLICIT_DEF
+ %29:vgpr_32 = IMPLICIT_DEF
+ %30:vgpr_32 = IMPLICIT_DEF
+ %31:vgpr_32 = IMPLICIT_DEF
+ %32:vgpr_32 = IMPLICIT_DEF
+ %33:vgpr_32 = IMPLICIT_DEF
+ %34:vgpr_32 = IMPLICIT_DEF
+ %35:vgpr_32 = IMPLICIT_DEF
+ %36:vgpr_32 = IMPLICIT_DEF
+ %37:vgpr_32 = IMPLICIT_DEF
+ %38:vgpr_32 = IMPLICIT_DEF
+ %39:vgpr_32 = IMPLICIT_DEF
+ %40:vgpr_32 = IMPLICIT_DEF
+ %41:vgpr_32 = IMPLICIT_DEF
+ %50:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %10, implicit $exec, implicit $mode
+ %51:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %11, implicit $exec, implicit $mode
+ %52:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %12, implicit $exec, implicit $mode
+ %53:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %13, implicit $exec, implicit $mode
+ %54:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %14, implicit $exec, implicit $mode
+ %55:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %15, implicit $exec, implicit $mode
+ %56:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %16, implicit $exec, implicit $mode
+ %57:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %17, implicit $exec, implicit $mode
+ %58:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %18, implicit $exec, implicit $mode
+ %59:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %19, implicit $exec, implicit $mode
+ %60:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %20, implicit $exec, implicit $mode
+ %61:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %21, implicit $exec, implicit $mode
+ %62:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %22, implicit $exec, implicit $mode
+ %63:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %23, implicit $exec, implicit $mode
+ %64:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %24, implicit $exec, implicit $mode
+ %65:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %25, implicit $exec, implicit $mode
+ %66:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %26, implicit $exec, implicit $mode
+ %67:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %27, implicit $exec, implicit $mode
+ %68:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %28, implicit $exec, implicit $mode
+ %69:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %29, implicit $exec, implicit $mode
+ %70:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %30, implicit $exec, implicit $mode
+ %71:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %31, implicit $exec, implicit $mode
+ %72:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %32, implicit $exec, implicit $mode
+ %73:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %33, implicit $exec, implicit $mode
+ %74:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %34, implicit $exec, implicit $mode
+ %75:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %35, implicit $exec, implicit $mode
+ %76:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %36, implicit $exec, implicit $mode
+ %77:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %37, implicit $exec, implicit $mode
+ %78:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %38, implicit $exec, implicit $mode
+ %79:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %39, implicit $exec, implicit $mode
+ %80:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %40, implicit $exec, implicit $mode
+ %81:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %41, implicit $exec, implicit $mode
+ S_BRANCH %bb.4
+
+ bb.4:
+ S_NOP 0, implicit %55, implicit %65
+ S_NOP 0, implicit %50, implicit %60, implicit %11, implicit %21
+ S_NOP 0, implicit %51, implicit %61, implicit %12, implicit %22
+ S_NOP 0, implicit %52, implicit %62, implicit %13, implicit %23
+ S_NOP 0, implicit %53, implicit %63, implicit %14, implicit %24
+ S_NOP 0, implicit %54, implicit %64, implicit %15, implicit %25
+ S_NOP 0, implicit %56, implicit %66
+ S_NOP 0, implicit %57, implicit %67
+ S_NOP 0, implicit %58, implicit %68
+ S_NOP 0, implicit %59, implicit %69
+ S_NOP 0, implicit %70, implicit %71
+ S_NOP 0, implicit %72, implicit %73
+ S_NOP 0, implicit %72, implicit %73
+ S_NOP 0, implicit %74, implicit %75
+ S_NOP 0, implicit %76, implicit %77
+ S_NOP 0, implicit %78, implicit %79
+ S_NOP 0, implicit %80
+ S_ENDPGM 0
+...
+
+---
+name: remat_virtual_vgpr_subreg_occ_6
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; GFX908-LABEL: name: remat_virtual_vgpr_subreg_occ_6
+ ; GFX908: bb.0:
+ ; GFX908-NEXT: successors: %bb.1(0x80000000)
+ ; GFX908-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+ ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: undef [[DEF:%[0-9]+]].sub0:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF:%[0-9]+]].sub1:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF:%[0-9]+]].sub2:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF:%[0-9]+]].sub3:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
+ ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]].sub2, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF31:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_BRANCH %bb.1
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.1:
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]].sub0, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_27]], implicit [[V_CVT_I32_F32_e32_22]], implicit [[DEF]].sub0, implicit [[DEF]].sub2
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_28]], implicit [[V_CVT_I32_F32_e32_23]], implicit [[DEF28]], implicit [[DEF24]]
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_29]], implicit [[V_CVT_I32_F32_e32_24]], implicit [[DEF29]], implicit [[DEF25]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_30]], implicit [[V_CVT_I32_F32_e32_25]], implicit [[DEF30]], implicit [[DEF26]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_31]], implicit [[V_CVT_I32_F32_e32_26]], implicit [[DEF31]], implicit [[DEF27]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_6]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_7]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_8]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_9]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_10]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_11]], implicit [[V_CVT_I32_F32_e32_12]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_13]], implicit [[V_CVT_I32_F32_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_13]], implicit [[V_CVT_I32_F32_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_15]], implicit [[V_CVT_I32_F32_e32_16]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_17]], implicit [[V_CVT_I32_F32_e32_18]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_19]], implicit [[V_CVT_I32_F32_e32_20]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_21]]
+ ; GFX908-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr0_sgpr1
+
+ %1:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ %2:vgpr_32(s32) = COPY $vgpr0
+ %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ %4:sreg_64 = V_CMP_GT_U32_e64 %3.sub0, %2(s32), implicit $exec
+ undef %5.sub1:sreg_64 = S_MOV_B32 0
+ %5.sub0:sreg_64 = COPY %3.sub1
+ undef %10.sub0:vreg_512 = IMPLICIT_DEF
+ %10.sub1:vreg_512 = IMPLICIT_DEF
+ %10.sub2:vreg_512 = IMPLICIT_DEF
+ %10.sub3:vreg_512 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vgpr_32 = IMPLICIT_DEF
+ %13:vgpr_32 = IMPLICIT_DEF
+ %14:vgpr_32 = IMPLICIT_DEF
+ %15:vgpr_32 = IMPLICIT_DEF
+ %16:vgpr_32 = IMPLICIT_DEF
+ %17:vgpr_32 = IMPLICIT_DEF
+ %18:vgpr_32 = IMPLICIT_DEF
+ %19:vgpr_32 = IMPLICIT_DEF
+ %20:vgpr_32 = IMPLICIT_DEF
+ %21:vgpr_32 = IMPLICIT_DEF
+ %22:vgpr_32 = IMPLICIT_DEF
+ %23:vgpr_32 = IMPLICIT_DEF
+ %24:vgpr_32 = IMPLICIT_DEF
+ %25:vgpr_32 = IMPLICIT_DEF
+ %26:vgpr_32 = IMPLICIT_DEF
+ %27:vgpr_32 = IMPLICIT_DEF
+ %28:vgpr_32 = IMPLICIT_DEF
+ %29:vgpr_32 = IMPLICIT_DEF
+ %30:vgpr_32 = IMPLICIT_DEF
+ %31:vgpr_32 = IMPLICIT_DEF
+ %32:vgpr_32 = IMPLICIT_DEF
+ %33:vgpr_32 = IMPLICIT_DEF
+ %34:vgpr_32 = IMPLICIT_DEF
+ %35:vgpr_32 = IMPLICIT_DEF
+ %36:vgpr_32 = IMPLICIT_DEF
+ %37:vgpr_32 = IMPLICIT_DEF
+ %38:vgpr_32 = IMPLICIT_DEF
+ %39:vgpr_32 = IMPLICIT_DEF
+ %40:vgpr_32 = IMPLICIT_DEF
+ %41:vgpr_32 = IMPLICIT_DEF
+ %50:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %10.sub0, implicit $exec, implicit $mode
+ %51:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %11, implicit $exec, implicit $mode
+ %52:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %12, implicit $exec, implicit $mode
+ %53:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %13, implicit $exec, implicit $mode
+ %54:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %14, implicit $exec, implicit $mode
+ %55:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %15, implicit $exec, implicit $mode
+ %56:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %16, implicit $exec, implicit $mode
+ %57:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %17, implicit $exec, implicit $mode
+ %58:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %18, implicit $exec, implicit $mode
+ %59:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %19, implicit $exec, implicit $mode
+ %60:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %10.sub2, implicit $exec, implicit $mode
+ %61:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %21, implicit $exec, implicit $mode
+ %62:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %22, implicit $exec, implicit $mode
+ %63:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %23, implicit $exec, implicit $mode
+ %64:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %24, implicit $exec, implicit $mode
+ %65:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %25, implicit $exec, implicit $mode
+ %66:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %26, implicit $exec, implicit $mode
+ %67:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %27, implicit $exec, implicit $mode
+ %68:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %28, implicit $exec, implicit $mode
+ %69:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %29, implicit $exec, implicit $mode
+ %70:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %30, implicit $exec, implicit $mode
+ %71:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %31, implicit $exec, implicit $mode
+ %72:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %32, implicit $exec, implicit $mode
+ %73:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %33, implicit $exec, implicit $mode
+ %74:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %34, implicit $exec, implicit $mode
+ %75:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %35, implicit $exec, implicit $mode
+ %76:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %36, implicit $exec, implicit $mode
+ %77:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %37, implicit $exec, implicit $mode
+ %78:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %38, implicit $exec, implicit $mode
+ %79:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %39, implicit $exec, implicit $mode
+ %80:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %40, implicit $exec, implicit $mode
+ %81:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %41, implicit $exec, implicit $mode
+ S_BRANCH %bb.4
+
+ bb.4:
+ S_NOP 0, implicit %50, implicit %60, implicit %10.sub0, implicit %10.sub2
+ S_NOP 0, implicit %51, implicit %61, implicit %11, implicit %21
+ S_NOP 0, implicit %52, implicit %62, implicit %12, implicit %22
+ S_NOP 0, implicit %53, implicit %63, implicit %13, implicit %23
+ S_NOP 0, implicit %54, implicit %64, implicit %14, implicit %24
+ S_NOP 0, implicit %55, implicit %65
+ S_NOP 0, implicit %56, implicit %66
+ S_NOP 0, implicit %57, implicit %67
+ S_NOP 0, implicit %58, implicit %68
+ S_NOP 0, implicit %59, implicit %69
+ S_NOP 0, implicit %70, implicit %71
+ S_NOP 0, implicit %72, implicit %73
+ S_NOP 0, implicit %72, implicit %73
+ S_NOP 0, implicit %74, implicit %75
+ S_NOP 0, implicit %76, implicit %77
+ S_NOP 0, implicit %78, implicit %79
+ S_NOP 0, implicit %80
+ S_ENDPGM 0
+...
+
+---
+name: remat_virtual_vgpr_subreg_uses_not_available
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; GFX908-LABEL: name: remat_virtual_vgpr_subreg_uses_not_available
+ ; GFX908: bb.0:
+ ; GFX908-NEXT: successors: %bb.1(0x80000000)
+ ; GFX908-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+ ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: undef [[DEF:%[0-9]+]].sub0:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF:%[0-9]+]].sub1:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF:%[0-9]+]].sub2:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF:%[0-9]+]].sub3:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
+ ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF31:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]].sub0, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]].sub2, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_BRANCH %bb.1
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.1:
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_22]], implicit [[V_CVT_I32_F32_e32_27]], implicit [[DEF]].sub1, implicit [[DEF]].sub3
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_6]], implicit [[DEF24]], implicit [[DEF28]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF25]], implicit [[DEF29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_24]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF26]], implicit [[DEF30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_25]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF27]], implicit [[DEF31]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_26]], implicit [[V_CVT_I32_F32_e32_31]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_7]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_8]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_9]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_10]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_11]], implicit [[V_CVT_I32_F32_e32_12]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_13]], implicit [[V_CVT_I32_F32_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_13]], implicit [[V_CVT_I32_F32_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_15]], implicit [[V_CVT_I32_F32_e32_16]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_17]], implicit [[V_CVT_I32_F32_e32_18]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_19]], implicit [[V_CVT_I32_F32_e32_20]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_21]]
+ ; GFX908-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr0_sgpr1
+
+ %1:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ %2:vgpr_32(s32) = COPY $vgpr0
+ %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ %4:sreg_64 = V_CMP_GT_U32_e64 %3.sub0, %2(s32), implicit $exec
+ undef %5.sub1:sreg_64 = S_MOV_B32 0
+ %5.sub0:sreg_64 = COPY %3.sub1
+ undef %10.sub0:vreg_512 = IMPLICIT_DEF
+ %10.sub1:vreg_512 = IMPLICIT_DEF
+ %10.sub2:vreg_512 = IMPLICIT_DEF
+ %10.sub3:vreg_512 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vgpr_32 = IMPLICIT_DEF
+ %13:vgpr_32 = IMPLICIT_DEF
+ %14:vgpr_32 = IMPLICIT_DEF
+ %15:vgpr_32 = IMPLICIT_DEF
+ %16:vgpr_32 = IMPLICIT_DEF
+ %17:vgpr_32 = IMPLICIT_DEF
+ %18:vgpr_32 = IMPLICIT_DEF
+ %19:vgpr_32 = IMPLICIT_DEF
+ %20:vgpr_32 = IMPLICIT_DEF
+ %21:vgpr_32 = IMPLICIT_DEF
+ %22:vgpr_32 = IMPLICIT_DEF
+ %23:vgpr_32 = IMPLICIT_DEF
+ %24:vgpr_32 = IMPLICIT_DEF
+ %25:vgpr_32 = IMPLICIT_DEF
+ %26:vgpr_32 = IMPLICIT_DEF
+ %27:vgpr_32 = IMPLICIT_DEF
+ %28:vgpr_32 = IMPLICIT_DEF
+ %29:vgpr_32 = IMPLICIT_DEF
+ %30:vgpr_32 = IMPLICIT_DEF
+ %31:vgpr_32 = IMPLICIT_DEF
+ %32:vgpr_32 = IMPLICIT_DEF
+ %33:vgpr_32 = IMPLICIT_DEF
+ %34:vgpr_32 = IMPLICIT_DEF
+ %35:vgpr_32 = IMPLICIT_DEF
+ %36:vgpr_32 = IMPLICIT_DEF
+ %37:vgpr_32 = IMPLICIT_DEF
+ %38:vgpr_32 = IMPLICIT_DEF
+ %39:vgpr_32 = IMPLICIT_DEF
+ %40:vgpr_32 = IMPLICIT_DEF
+ %41:vgpr_32 = IMPLICIT_DEF
+ %50:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %10.sub0, implicit $exec, implicit $mode
+ %51:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %11, implicit $exec, implicit $mode
+ %52:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %12, implicit $exec, implicit $mode
+ %53:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %13, implicit $exec, implicit $mode
+ %54:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %14, implicit $exec, implicit $mode
+ %55:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %15, implicit $exec, implicit $mode
+ %56:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %16, implicit $exec, implicit $mode
+ %57:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %17, implicit $exec, implicit $mode
+ %58:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %18, implicit $exec, implicit $mode
+ %59:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %19, implicit $exec, implicit $mode
+ %60:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %10.sub2, implicit $exec, implicit $mode
+ %61:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %21, implicit $exec, implicit $mode
+ %62:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %22, implicit $exec, implicit $mode
+ %63:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %23, implicit $exec, implicit $mode
+ %64:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %24, implicit $exec, implicit $mode
+ %65:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %25, implicit $exec, implicit $mode
+ %66:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %26, implicit $exec, implicit $mode
+ %67:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %27, implicit $exec, implicit $mode
+ %68:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %28, implicit $exec, implicit $mode
+ %69:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %29, implicit $exec, implicit $mode
+ %70:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %30, implicit $exec, implicit $mode
+ %71:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %31, implicit $exec, implicit $mode
+ %72:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %32, implicit $exec, implicit $mode
+ %73:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %33, implicit $exec, implicit $mode
+ %74:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %34, implicit $exec, implicit $mode
+ %75:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %35, implicit $exec, implicit $mode
+ %76:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %36, implicit $exec, implicit $mode
+ %77:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %37, implicit $exec, implicit $mode
+ %78:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %38, implicit $exec, implicit $mode
+ %79:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %39, implicit $exec, implicit $mode
+ %80:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %40, implicit $exec, implicit $mode
+ %81:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %41, implicit $exec, implicit $mode
+ S_BRANCH %bb.4
+
+ bb.4:
+ S_NOP 0, implicit %50, implicit %60, implicit %10.sub1, implicit %10.sub3
+ S_NOP 0, implicit %51, implicit %61, implicit %12, implicit %22
+ S_NOP 0, implicit %52, implicit %62, implicit %13, implicit %23
+ S_NOP 0, implicit %53, implicit %63, implicit %14, implicit %24
+ S_NOP 0, implicit %54, implicit %64, implicit %15, implicit %25
+ S_NOP 0, implicit %55, implicit %65
+ S_NOP 0, implicit %56, implicit %66
+ S_NOP 0, implicit %57, implicit %67
+ S_NOP 0, implicit %58, implicit %68
+ S_NOP 0, implicit %59, implicit %69
+ S_NOP 0, implicit %70, implicit %71
+ S_NOP 0, implicit %72, implicit %73
+ S_NOP 0, implicit %72, implicit %73
+ S_NOP 0, implicit %74, implicit %75
+ S_NOP 0, implicit %76, implicit %77
+ S_NOP 0, implicit %78, implicit %79
+ S_NOP 0, implicit %80
+ S_ENDPGM 0
+...
+
+
+---
+name: remat_virtual_vgpr_subreg_def
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; GFX908-LABEL: name: remat_virtual_vgpr_subreg_def
+ ; GFX908: bb.0:
+ ; GFX908-NEXT: successors: %bb.1(0x80000000)
+ ; GFX908-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+ ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: undef [[DEF:%[0-9]+]].sub0:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF:%[0-9]+]].sub1:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF:%[0-9]+]].sub2:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF:%[0-9]+]].sub3:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
+ ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+ ; GFX908-NEXT: undef [[V_CVT_I32_F32_e32_1:%[0-9]+]].sub1:vreg_64 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF31:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: undef [[V_CVT_I32_F32_e32_1:%[0-9]+]].sub0:vreg_64 = nofpexcept V_CVT_I32_F32_e32 [[DEF]].sub0, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_BRANCH %bb.1
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.1:
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]].sub2, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]].sub0, implicit [[V_CVT_I32_F32_e32_27]], implicit [[DEF]].sub0, implicit [[DEF]].sub2
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF1]], implicit [[DEF28]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_24]], implicit [[DEF25]], implicit [[DEF29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_29]], implicit [[V_CVT_I32_F32_e32_25]], implicit [[DEF26]], implicit [[DEF30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_30]], implicit [[V_CVT_I32_F32_e32_26]], implicit [[DEF27]], implicit [[DEF31]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_7]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_8]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_9]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_10]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_6]], implicit [[V_CVT_I32_F32_e32_11]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_12]], implicit [[V_CVT_I32_F32_e32_13]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_14]], implicit [[V_CVT_I32_F32_e32_15]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_14]], implicit [[V_CVT_I32_F32_e32_15]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_16]], implicit [[V_CVT_I32_F32_e32_17]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_18]], implicit [[V_CVT_I32_F32_e32_19]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_20]], implicit [[V_CVT_I32_F32_e32_21]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_22]]
+ ; GFX908-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr0_sgpr1
+
+ %1:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ %2:vgpr_32(s32) = COPY $vgpr0
+ %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ %4:sreg_64 = V_CMP_GT_U32_e64 %3.sub0, %2(s32), implicit $exec
+ undef %5.sub1:sreg_64 = S_MOV_B32 0
+ %5.sub0:sreg_64 = COPY %3.sub1
+ undef %10.sub0:vreg_512 = IMPLICIT_DEF
+ %10.sub1:vreg_512 = IMPLICIT_DEF
+ %10.sub2:vreg_512 = IMPLICIT_DEF
+ %10.sub3:vreg_512 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vgpr_32 = IMPLICIT_DEF
+ %13:vgpr_32 = IMPLICIT_DEF
+ %14:vgpr_32 = IMPLICIT_DEF
+ %15:vgpr_32 = IMPLICIT_DEF
+ %16:vgpr_32 = IMPLICIT_DEF
+ %17:vgpr_32 = IMPLICIT_DEF
+ %18:vgpr_32 = IMPLICIT_DEF
+ %19:vgpr_32 = IMPLICIT_DEF
+ %20:vgpr_32 = IMPLICIT_DEF
+ %21:vgpr_32 = IMPLICIT_DEF
+ %22:vgpr_32 = IMPLICIT_DEF
+ %23:vgpr_32 = IMPLICIT_DEF
+ %24:vgpr_32 = IMPLICIT_DEF
+ %25:vgpr_32 = IMPLICIT_DEF
+ %26:vgpr_32 = IMPLICIT_DEF
+ %27:vgpr_32 = IMPLICIT_DEF
+ %28:vgpr_32 = IMPLICIT_DEF
+ %29:vgpr_32 = IMPLICIT_DEF
+ %30:vgpr_32 = IMPLICIT_DEF
+ %31:vgpr_32 = IMPLICIT_DEF
+ %32:vgpr_32 = IMPLICIT_DEF
+ %33:vgpr_32 = IMPLICIT_DEF
+ %34:vgpr_32 = IMPLICIT_DEF
+ %35:vgpr_32 = IMPLICIT_DEF
+ %36:vgpr_32 = IMPLICIT_DEF
+ %37:vgpr_32 = IMPLICIT_DEF
+ %38:vgpr_32 = IMPLICIT_DEF
+ %39:vgpr_32 = IMPLICIT_DEF
+ %40:vgpr_32 = IMPLICIT_DEF
+ %41:vgpr_32 = IMPLICIT_DEF
+ undef %50.sub0:vreg_64 = nofpexcept V_CVT_I32_F32_e32 %10.sub0, implicit $exec, implicit $mode
+ %50.sub1:vreg_64 = nofpexcept V_CVT_I32_F32_e32 %11, implicit $exec, implicit $mode
+ %52:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %12, implicit $exec, implicit $mode
+ %53:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %13, implicit $exec, implicit $mode
+ %54:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %14, implicit $exec, implicit $mode
+ %55:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %15, implicit $exec, implicit $mode
+ %56:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %16, implicit $exec, implicit $mode
+ %57:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %17, implicit $exec, implicit $mode
+ %58:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %18, implicit $exec, implicit $mode
+ %59:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %19, implicit $exec, implicit $mode
+ %60:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %10.sub2, implicit $exec, implicit $mode
+ %61:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %21, implicit $exec, implicit $mode
+ %62:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %22, implicit $exec, implicit $mode
+ %63:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %23, implicit $exec, implicit $mode
+ %64:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %24, implicit $exec, implicit $mode
+ %65:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %25, implicit $exec, implicit $mode
+ %66:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %26, implicit $exec, implicit $mode
+ %67:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %27, implicit $exec, implicit $mode
+ %68:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %28, implicit $exec, implicit $mode
+ %69:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %29, implicit $exec, implicit $mode
+ %70:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %30, implicit $exec, implicit $mode
+ %71:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %31, implicit $exec, implicit $mode
+ %72:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %32, implicit $exec, implicit $mode
+ %73:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %33, implicit $exec, implicit $mode
+ %74:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %34, implicit $exec, implicit $mode
+ %75:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %35, implicit $exec, implicit $mode
+ %76:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %36, implicit $exec, implicit $mode
+ %77:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %37, implicit $exec, implicit $mode
+ %78:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %38, implicit $exec, implicit $mode
+ %79:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %39, implicit $exec, implicit $mode
+ %80:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %40, implicit $exec, implicit $mode
+ %81:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %41, implicit $exec, implicit $mode
+ S_BRANCH %bb.4
+
+ bb.4:
+ S_NOP 0, implicit %50.sub0, implicit %60, implicit %10.sub0, implicit %10.sub2
+ S_NOP 0, implicit %52, implicit %61, implicit %11, implicit %21
+ S_NOP 0, implicit %52, implicit %62, implicit %12, implicit %22
+ S_NOP 0, implicit %53, implicit %63, implicit %13, implicit %23
+ S_NOP 0, implicit %54, implicit %64, implicit %14, implicit %24
+ S_NOP 0, implicit %55, implicit %65
+ S_NOP 0, implicit %56, implicit %66
+ S_NOP 0, implicit %57, implicit %67
+ S_NOP 0, implicit %58, implicit %68
+ S_NOP 0, implicit %59, implicit %69
+ S_NOP 0, implicit %70, implicit %71
+ S_NOP 0, implicit %72, implicit %73
+ S_NOP 0, implicit %72, implicit %73
+ S_NOP 0, implicit %74, implicit %75
+ S_NOP 0, implicit %76, implicit %77
+ S_NOP 0, implicit %78, implicit %79
+ S_NOP 0, implicit %80
+ S_ENDPGM 0
+...
+
+# The remat candidate (instruction defining %3) uses a register (%2) which does not have a subrange for every subregister (sub1 is undefined).
+# Be sure that when checking if we can rematerialize %3, that we handle the liveness checking of %2 properly.
+
+---
+name: omitted_subrange
+tracksRegLiveness: true
+body: |
+ ; GFX908-LABEL: name: omitted_subrange
+ ; GFX908: bb.0:
+ ; GFX908-NEXT: successors: %bb.1(0x80000000)
+ ; GFX908-NEXT: liveins: $sgpr3, $sgpr4
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF22:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; GFX908-NEXT: undef [[V_RCP_F32_e32_:%[0-9]+]].sub0:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_RCP_F32_e32 [[DEF22]].sub0, implicit $mode, implicit $exec
+ ; GFX908-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF31:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF32:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[DEF33:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF34:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_BRANCH %bb.1
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.1:
+ ; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; GFX908-NEXT: liveins: $sgpr3, $sgpr4
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: %temp:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_CMP_LG_U32 $sgpr3, $sgpr4, implicit-def $scc
+ ; GFX908-NEXT: [[DEF34:%[0-9]+]].sub0:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_MUL_F32_e64 0, [[DEF22]].sub0, 1, %temp, 0, 0, implicit $mode, implicit $exec
+ ; GFX908-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; GFX908-NEXT: S_BRANCH %bb.3
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.2:
+ ; GFX908-NEXT: successors: %bb.3(0x80000000)
+ ; GFX908-NEXT: liveins: $sgpr3, $sgpr4
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: undef [[V_FMA_F32_e64_:%[0-9]+]].sub0:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_FMA_F32_e64 0, [[DEF34]].sub1, 0, [[V_RCP_F32_e32_]].sub0, 0, [[DEF34]].sub0, 0, 0, implicit $mode, implicit $exec
+ ; GFX908-NEXT: %temp2:vreg_64_align2 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_PK_MUL_F32_:%[0-9]+]]:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_PK_MUL_F32 0, [[V_RCP_F32_e32_]], 8, [[DEF22]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX908-NEXT: dead [[V_PK_FMA_F32_:%[0-9]+]]:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_PK_FMA_F32 0, [[V_FMA_F32_e64_]], 8, %temp2, 11, [[V_PK_MUL_F32_]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_22]], implicit [[V_CVT_I32_F32_e32_23]], implicit [[DEF23]], implicit [[DEF28]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_24]], implicit [[V_CVT_I32_F32_e32_25]], implicit [[DEF24]], implicit [[DEF29]]
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF32]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: dead [[DEF35:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_26]], implicit [[V_CVT_I32_F32_e32_27]], implicit [[DEF25]], implicit [[DEF30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_28]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF26]], implicit [[DEF31]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_30]], implicit [[V_CVT_I32_F32_e32_31]], implicit [[DEF27]], implicit [[DEF32]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_6]], implicit [[DEF22]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_7]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_8]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_9]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_10]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_11]], implicit [[V_CVT_I32_F32_e32_12]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_13]], implicit [[V_CVT_I32_F32_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_13]], implicit [[V_CVT_I32_F32_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_15]], implicit [[V_CVT_I32_F32_e32_16]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_17]], implicit [[V_CVT_I32_F32_e32_18]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_19]], implicit [[V_CVT_I32_F32_e32_20]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_21]]
+ ; GFX908-NEXT: S_BRANCH %bb.3
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.3:
+ ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000)
+ ; GFX908-NEXT: liveins: $sgpr3, $sgpr4
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: S_CMP_LG_U32 $sgpr4, 0, implicit-def $scc
+ ; GFX908-NEXT: S_CBRANCH_SCC1 %bb.1, implicit killed $scc
+ ; GFX908-NEXT: S_BRANCH %bb.4
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.4:
+ ; GFX908-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $sgpr3, $sgpr4
+
+ %0:vreg_64 = IMPLICIT_DEF
+ %1:vreg_64_align2 = IMPLICIT_DEF
+ undef %2.sub0:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_RCP_F32_e32 %1.sub0:vreg_64_align2, implicit $mode, implicit $exec
+ %3:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_PK_MUL_F32 0, %2, 8, %1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ %5:vreg_64_align2 = IMPLICIT_DEF
+ %10:vgpr_32 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vgpr_32 = IMPLICIT_DEF
+ %13:vgpr_32 = IMPLICIT_DEF
+ %14:vgpr_32 = IMPLICIT_DEF
+ %15:vgpr_32 = IMPLICIT_DEF
+ %16:vgpr_32 = IMPLICIT_DEF
+ %17:vgpr_32 = IMPLICIT_DEF
+ %18:vgpr_32 = IMPLICIT_DEF
+ %19:vgpr_32 = IMPLICIT_DEF
+ %20:vgpr_32 = IMPLICIT_DEF
+ %21:vgpr_32 = IMPLICIT_DEF
+ %22:vgpr_32 = IMPLICIT_DEF
+ %23:vgpr_32 = IMPLICIT_DEF
+ %24:vgpr_32 = IMPLICIT_DEF
+ %25:vgpr_32 = IMPLICIT_DEF
+ %26:vgpr_32 = IMPLICIT_DEF
+ %27:vgpr_32 = IMPLICIT_DEF
+ %28:vgpr_32 = IMPLICIT_DEF
+ %29:vgpr_32 = IMPLICIT_DEF
+ %30:vgpr_32 = IMPLICIT_DEF
+ %31:vgpr_32 = IMPLICIT_DEF
+ %32:vgpr_32 = IMPLICIT_DEF
+ %33:vgpr_32 = IMPLICIT_DEF
+ %34:vgpr_32 = IMPLICIT_DEF
+ %35:vgpr_32 = IMPLICIT_DEF
+ %36:vgpr_32 = IMPLICIT_DEF
+ %37:vgpr_32 = IMPLICIT_DEF
+ %38:vgpr_32 = IMPLICIT_DEF
+ %39:vgpr_32 = IMPLICIT_DEF
+ %40:vgpr_32 = IMPLICIT_DEF
+ %41:vgpr_32 = IMPLICIT_DEF
+ %50:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %10, implicit $exec, implicit $mode
+ %51:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %11, implicit $exec, implicit $mode
+ %52:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %12, implicit $exec, implicit $mode
+ %53:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %13, implicit $exec, implicit $mode
+ %54:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %14, implicit $exec, implicit $mode
+ %55:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %15, implicit $exec, implicit $mode
+ %56:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %16, implicit $exec, implicit $mode
+ %57:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %17, implicit $exec, implicit $mode
+ %58:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %18, implicit $exec, implicit $mode
+ %59:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %19, implicit $exec, implicit $mode
+ %60:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %20, implicit $exec, implicit $mode
+ %61:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %21, implicit $exec, implicit $mode
+ %62:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %22, implicit $exec, implicit $mode
+ %63:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %23, implicit $exec, implicit $mode
+ %64:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %24, implicit $exec, implicit $mode
+ %65:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %25, implicit $exec, implicit $mode
+ %66:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %26, implicit $exec, implicit $mode
+ %67:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %27, implicit $exec, implicit $mode
+ %68:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %28, implicit $exec, implicit $mode
+ %69:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %29, implicit $exec, implicit $mode
+ %70:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %30, implicit $exec, implicit $mode
+ %71:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %31, implicit $exec, implicit $mode
+ %72:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %32, implicit $exec, implicit $mode
+ %73:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %33, implicit $exec, implicit $mode
+ %74:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %34, implicit $exec, implicit $mode
+ %75:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %35, implicit $exec, implicit $mode
+ %76:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %36, implicit $exec, implicit $mode
+ %77:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %37, implicit $exec, implicit $mode
+ %78:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %38, implicit $exec, implicit $mode
+ %79:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %39, implicit $exec, implicit $mode
+ %80:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %40, implicit $exec, implicit $mode
+ %81:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %41, implicit $exec, implicit $mode
+ S_BRANCH %bb.1
+
+ bb.1:
+ liveins: $sgpr3, $sgpr4
+
+ %temp:vgpr_32 = IMPLICIT_DEF
+ %5.sub0:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_MUL_F32_e64 0, %1.sub0:vreg_64_align2, 1, %temp, 0, 0, implicit $mode, implicit $exec
+
+ S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+ S_BRANCH %bb.4
+
+ bb.3:
+ liveins: $sgpr3, $sgpr4
+
+ %6:vreg_64_align2 = IMPLICIT_DEF
+ undef %7.sub0:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_FMA_F32_e64 0, %5.sub1, 0, %2.sub0:vreg_64_align2, 0, %5.sub0:vreg_64_align2, 0, 0, implicit $mode, implicit $exec
+ %temp2:vreg_64_align2 = IMPLICIT_DEF
+ %8:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_PK_FMA_F32 0, %7:vreg_64_align2, 8, %temp2, 11, %3:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ S_NOP 0, implicit %50, implicit %60, implicit %10, implicit %20
+ S_NOP 0, implicit %51, implicit %61, implicit %11, implicit %21
+ S_NOP 0, implicit %52, implicit %62, implicit %12, implicit %22
+ S_NOP 0, implicit %53, implicit %63, implicit %13, implicit %23
+ S_NOP 0, implicit %54, implicit %64, implicit %14, implicit %24
+ S_NOP 0, implicit %55, implicit %65, implicit %1
+ S_NOP 0, implicit %56, implicit %66
+ S_NOP 0, implicit %57, implicit %67
+ S_NOP 0, implicit %58, implicit %68
+ S_NOP 0, implicit %59, implicit %69
+ S_NOP 0, implicit %70, implicit %71
+ S_NOP 0, implicit %72, implicit %73
+ S_NOP 0, implicit %72, implicit %73
+ S_NOP 0, implicit %74, implicit %75
+ S_NOP 0, implicit %76, implicit %77
+ S_NOP 0, implicit %78, implicit %79
+ S_NOP 0, implicit %80
+ S_BRANCH %bb.4
+
+ bb.4:
+ liveins: $sgpr3, $sgpr4
+
+ S_CMP_LG_U32 $sgpr4, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.1, implicit killed $scc
+ S_BRANCH %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
>From 6e0e80e6f6570ae227fceffb4f983c346bf24e80 Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Mon, 17 Feb 2025 12:24:58 +0000
Subject: [PATCH 05/10] Address feedback
- Properly account for AGPRs in excess RP calculations.
- Account for SGPR spilling when deciding whether to try to increase
occupancy.
- Don't rollback when rescheduling managed to improve occuoancy after
remats.
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 157 +++--
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 7 +
llvm/lib/Target/AMDGPU/GCNSubtarget.cpp | 20 +-
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 28 +-
.../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 40 +-
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 15 -
.../atomic_optimizations_global_pointer.ll | 4 +-
.../atomic_optimizations_local_pointer.ll | 141 +++--
.../test/CodeGen/AMDGPU/dynamic_stackalloc.ll | 41 +-
.../AMDGPU/global_atomics_scan_fadd.ll | 50 +-
.../AMDGPU/global_atomics_scan_fmax.ll | 22 +-
.../AMDGPU/global_atomics_scan_fmin.ll | 22 +-
.../AMDGPU/global_atomics_scan_fsub.ll | 70 ++-
llvm/test/CodeGen/AMDGPU/idiv-licm.ll | 123 ++--
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll | 4 +-
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll | 4 +-
.../llvm.amdgcn.struct.atomic.buffer.load.ll | 7 +-
...vm.amdgcn.struct.ptr.atomic.buffer.load.ll | 7 +-
.../test/CodeGen/AMDGPU/loop-prefetch-data.ll | 8 +-
...ine-scheduler-sink-trivial-remats-attr.mir | 555 +++++++++++++++++-
.../CodeGen/AMDGPU/no-dup-inst-prefetch.ll | 2 +-
.../AMDGPU/pal-metadata-3.0-callable.ll | 2 +-
22 files changed, 944 insertions(+), 385 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index fbf1f9dae03a5..8f5b96fb788d8 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1675,6 +1675,41 @@ bool PreRARematStage::allUsesAvailableAt(const MachineInstr *InstToRemat,
return true;
}
+bool PreRARematStage::hasExcessVGPRs(const GCNRegPressure &RP,
+ unsigned MaxVGPRs,
+ unsigned &ExcessArchVGPRs,
+ bool &AGPRLimited) {
+ unsigned NumAGPRs = RP.getAGPRNum();
+ if (!ST.hasGFX90AInsts() || !NumAGPRs) {
+ // Non-unified RF. We can only reduce ArchVGPR excess pressure at this
+ // point, but still want to identify when there is AGPR excess pressure.
+ bool HasSpill = false;
+ unsigned NumArchVGPRs = RP.getArchVGPRNum();
+ if (NumArchVGPRs > MaxVGPRs) {
+ ExcessArchVGPRs = NumArchVGPRs - MaxVGPRs;
+ HasSpill = true;
+ }
+ if (NumAGPRs > MaxVGPRs) {
+ AGPRLimited = true;
+ HasSpill = true;
+ }
+ return HasSpill;
+ }
+ if (RP.getVGPRNum(true) > MaxVGPRs) {
+ // Unified RF. We can only remat ArchVGPRs; AGPR pressure alone may prevent
+ // us from eliminating spilling.
+ unsigned NumArchVGPRs = RP.getArchVGPRNum();
+ if (NumAGPRs >= MaxVGPRs) {
+ AGPRLimited = true;
+ ExcessArchVGPRs = NumArchVGPRs;
+ } else {
+ ExcessArchVGPRs = NumArchVGPRs - alignDown(MaxVGPRs - NumAGPRs, 4);
+ }
+ return true;
+ }
+ return false;
+}
+
bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
@@ -1684,51 +1719,86 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
// Maps optimizable regions (i.e., regions at minimum and VGPR-limited
// occupancy, or regions with VGPR spilling) to their excess RP.
DenseMap<unsigned, unsigned> OptRegions;
-
- // Note that the maximum number of VGPRs to use to eliminate spill may be
- // lower than the maximum number to increase occupancy when the function has
- // the "amdgpu-num-vgpr" attribute.
- const std::pair<unsigned, unsigned> OccBounds =
+ const Function &F = MF.getFunction();
+ const bool UnifiedRF = ST.hasGFX90AInsts();
+
+ // Adjust workgroup size induced occupancy bounds with the
+ // "amdgpu-waves-per-eu" attribute. This should be offloaded to a subtarget
+ // method, but at this point is if unclear how other parts of the codebase
+ // interpret this attribute and the default behavior produces unexpected
+ // bounds. Here we want to allow users to ask for target occupancies lower
+ // than the default lower bound.
+ std::pair<unsigned, unsigned> OccBounds =
ST.getOccupancyWithWorkGroupSizes(MF);
- // FIXME: we should be able to just call ST.getMaxNumArchVGPRs() but that
- // would use the occupancy bounds as determined by
- // MF.getFunction().getWavesPerEU(), which look incorrect in some cases.
+ std::pair<unsigned, unsigned> WavesPerEU =
+ AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu", {0, 0}, true);
+ if (WavesPerEU.first <= WavesPerEU.second) {
+ if (WavesPerEU.first && WavesPerEU.first <= OccBounds.second)
+ OccBounds.first = WavesPerEU.first;
+ if (WavesPerEU.second)
+ OccBounds.second = std::min(OccBounds.second, WavesPerEU.second);
+ }
+
+ // We call the "base max functions" directly because otherwise it uses the
+ // subtarget's logic for combining "amdgpu-waves-per-eu" with the function's
+ // groupsize induced occupancy bounds, producing unexpected results.
+ const unsigned MaxSGPRsNoSpill = ST.getBaseMaxNumSGPRs(
+ F, OccBounds, ST.getMaxNumPreloadedSGPRs(), ST.getReservedNumSGPRs(F));
const unsigned MaxVGPRsNoSpill =
- ST.getBaseMaxNumVGPRs(MF.getFunction(),
- {ST.getMinNumArchVGPRs(OccBounds.second),
- ST.getMaxNumArchVGPRs(OccBounds.first)},
- false);
- const unsigned MaxVGPRsIncOcc = ST.getMaxNumArchVGPRs(DAG.MinOccupancy + 1);
+ ST.getBaseMaxNumVGPRs(F, {ST.getMinNumVGPRs(OccBounds.second),
+ ST.getMaxNumVGPRs(OccBounds.first)});
+ const unsigned MaxSGPRsMinOcc = ST.getMaxNumSGPRs(DAG.MinOccupancy, false);
+ const unsigned MaxVGPRsIncOcc = ST.getMaxNumVGPRs(DAG.MinOccupancy + 1);
IncreaseOccupancy = OccBounds.second > DAG.MinOccupancy;
+ auto ClearOptRegionsIf = [&](bool Cond) -> bool {
+ if (Cond) {
+ // We won't try to increase occupancy.
+ IncreaseOccupancy = false;
+ OptRegions.clear();
+ }
+ return Cond;
+ };
+
// Collect optimizable regions. If there is spilling in any region we will
- // just try to reduce it. Otherwise we will try to increase occupancy by one.
+ // just try to reduce ArchVGPR spilling. Otherwise we will try to increase
+ // occupancy by one in the whole function.
for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
GCNRegPressure &RP = DAG.Pressure[I];
- unsigned NumVGPRs = RP.getArchVGPRNum();
unsigned ExcessRP = 0;
- if (NumVGPRs > MaxVGPRsNoSpill) {
- if (IncreaseOccupancy) {
- // We won't try to increase occupancy.
- IncreaseOccupancy = false;
- OptRegions.clear();
- }
- // Region has VGPR spilling, we will try to reduce spilling as much as
- // possible.
- ExcessRP = NumVGPRs - MaxVGPRsNoSpill;
- REMAT_DEBUG(dbgs() << "Region " << I << " is spilling VGPRs, save "
- << ExcessRP << " VGPR(s) to eliminate spilling\n");
+ unsigned NumSGPRs = RP.getSGPRNum();
+
+ // Check whether SGPR pressures prevents us from eliminating spilling.
+ if (NumSGPRs > MaxSGPRsNoSpill)
+ ClearOptRegionsIf(IncreaseOccupancy);
+
+ bool OccAGPRLimited = false;
+ if (hasExcessVGPRs(RP, MaxVGPRsNoSpill, ExcessRP, OccAGPRLimited)) {
+ ClearOptRegionsIf(IncreaseOccupancy);
+ REMAT_DEBUG({
+ if (ExcessRP) {
+ StringRef RegClass = UnifiedRF ? "VGPRs" : "ArchVGPRs";
+ dbgs() << "Region " << I << " is spilling " << RegClass << ", save "
+ << ExcessRP << " to eliminate " << RegClass << "-spilling\n";
+ }
+ });
} else if (IncreaseOccupancy) {
- if (ST.getOccupancyWithNumSGPRs(RP.getSGPRNum()) == DAG.MinOccupancy) {
- // Occupancy is SGPR-limited in the region, no point in trying to
- // increase it through VGPR usage.
- IncreaseOccupancy = false;
- OptRegions.clear();
- } else if (NumVGPRs > MaxVGPRsIncOcc) {
- // Occupancy is VGPR-limited.
- ExcessRP = NumVGPRs - MaxVGPRsIncOcc;
- REMAT_DEBUG(dbgs() << "Region " << I << " has min. occupancy: save "
- << ExcessRP << " VGPR(s) to improve occupancy\n");
+ // Check whether SGPR pressure prevents us from increasing occupancy.
+ if (ClearOptRegionsIf(NumSGPRs > MaxSGPRsMinOcc))
+ continue;
+
+ if (hasExcessVGPRs(RP, MaxVGPRsIncOcc, ExcessRP, OccAGPRLimited)) {
+ // Check whether AGPR pressure prevents us from increasing occupancy.
+ if (ClearOptRegionsIf(OccAGPRLimited))
+ continue;
+ // Occupancy could be increased by rematerializing ArchVGPRs.
+ REMAT_DEBUG({
+ if (ExcessRP) {
+ StringRef RegClass = UnifiedRF ? "VGPRs" : "ArchVGPRs";
+ dbgs() << "Region " << I << " has min. occupancy: save " << ExcessRP
+ << " " << RegClass << " to improve occupancy\n";
+ }
+ });
}
}
if (ExcessRP)
@@ -1738,7 +1808,7 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
return false;
// When we are reducing spilling, the target is the minimum achievable
- // occupancy implied by workgroup sizes.
+ // occupancy implied by workgroup sizes / the "amdgpu-waves-per-eu" attribute.
TargetOcc = IncreaseOccupancy ? DAG.MinOccupancy + 1 : OccBounds.first;
// Accounts for a reduction in RP in an optimizable region. Returns whether we
@@ -2058,9 +2128,11 @@ static MachineBasicBlock *getRegionMBB(MachineFunction &MF,
void PreRARematStage::finalizeGCNSchedStage() {
// We consider that reducing spilling is always beneficial so we never
// rollback rematerializations in such cases. It's also possible that
- // rescheduling lowers occupancy over the one achived just through remats, in
- // which case we do not want to rollback either.
- if (!IncreaseOccupancy || AchievedOcc == TargetOcc)
+ // rescheduling lowers occupancy over the one achieved just through remats, in
+ // which case we do not want to rollback either (the rescheduling was already
+ // reverted in PreRARematStage::shouldRevertScheduling in such cases).
+ unsigned MaxOcc = std::max(AchievedOcc, DAG.MinOccupancy);
+ if (!IncreaseOccupancy || MaxOcc == TargetOcc)
return;
REMAT_DEBUG(dbgs() << "Rollbacking all rematerializations\n");
@@ -2077,10 +2149,7 @@ void PreRARematStage::finalizeGCNSchedStage() {
// Re-rematerialize MI at the end of its original region. Note that it may
// not be rematerialized exactly in the same position as originally within
- // the region, but it should not matter much. Since we are only
- // rematerializing instructions that do not have any virtual reg uses, we
- // do not need to call LiveRangeEdit::allUsesAvailableAt() and
- // LiveRangeEdit::canRematerializeAt().
+ // the region, but it should not matter much.
TII->reMaterialize(*MBB, InsertPos, Reg, SubReg, RematMI, *DAG.TRI);
MachineInstr *NewMI = &*std::prev(InsertPos);
NewMI->getOperand(0).setSubReg(SubReg);
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 2d6440b1b0730..9fd40d8e5e64f 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -485,6 +485,13 @@ class PreRARematStage : public GCNSchedStage {
/// spilling.
bool IncreaseOccupancy;
+ /// Determines whether there is excess VGPR pressure in \p RP w.r.t. \p
+ /// MaxVGPRs. If there is, \p ExcessArchVGPRs is set to the number of
+ /// ArchVGPRs one must save to eliminate the excess and \p AGPRLimited is set
+ /// to true if AGPR pressure alone causes an excess.
+ bool hasExcessVGPRs(const GCNRegPressure &RP, unsigned MaxVGPRs,
+ unsigned &ExcessArchVGPRs, bool &AGPRLimited);
+
/// Returns whether remat can reduce spilling or increase function occupancy
/// by 1 through rematerialization. If it can do one, collects instructions in
/// PreRARematStage::Rematerializations and sets the target occupancy in
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
index 0507b64c46192..6fb7d03a71a85 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
@@ -466,7 +466,7 @@ unsigned GCNSubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
getReservedNumSGPRs(MF));
}
-static unsigned getMaxNumPreloadedSGPRs() {
+unsigned GCNSubtarget::getMaxNumPreloadedSGPRs() const {
using USI = GCNUserSGPRUsageInfo;
// Max number of user SGPRs
const unsigned MaxUserSGPRs =
@@ -496,10 +496,8 @@ unsigned GCNSubtarget::getMaxNumSGPRs(const Function &F) const {
getReservedNumSGPRs(F));
}
-unsigned
-GCNSubtarget::getBaseMaxNumVGPRs(const Function &F,
- std::pair<unsigned, unsigned> NumVGPRBounds,
- bool UnifiedRF) const {
+unsigned GCNSubtarget::getBaseMaxNumVGPRs(
+ const Function &F, std::pair<unsigned, unsigned> NumVGPRBounds) const {
const auto &[Min, Max] = NumVGPRBounds;
// Check if maximum number of VGPRs was explicitly requested using
@@ -510,7 +508,7 @@ GCNSubtarget::getBaseMaxNumVGPRs(const Function &F,
unsigned Requested = F.getFnAttributeAsParsedInteger("amdgpu-num-vgpr", Max);
if (!Requested)
return Max;
- if (UnifiedRF)
+ if (hasGFX90AInsts())
Requested *= 2;
// Make sure requested value is inside the range of possible VGPR usage.
@@ -520,15 +518,7 @@ GCNSubtarget::getBaseMaxNumVGPRs(const Function &F,
unsigned GCNSubtarget::getMaxNumVGPRs(const Function &F) const {
std::pair<unsigned, unsigned> Waves = getWavesPerEU(F);
return getBaseMaxNumVGPRs(
- F, {getMinNumVGPRs(Waves.second), getMaxNumVGPRs(Waves.first)},
- hasGFX90AInsts());
-}
-
-unsigned GCNSubtarget::getMaxNumArchVGPRs(const Function &F) const {
- std::pair<unsigned, unsigned> Waves = getWavesPerEU(F);
- return getBaseMaxNumVGPRs(
- F, {getMinNumArchVGPRs(Waves.second), getMaxNumArchVGPRs(Waves.first)},
- false);
+ F, {getMinNumVGPRs(Waves.second), getMaxNumVGPRs(Waves.first)});
}
unsigned GCNSubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 8345a498045f5..92c6de8f3d7ad 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1487,6 +1487,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// \returns Reserved number of SGPRs for given function \p F.
unsigned getReservedNumSGPRs(const Function &F) const;
+ /// \returns Maximum number of preloaded SGPRs for the subtarget.
+ unsigned getMaxNumPreloadedSGPRs() const;
+
/// \returns max num SGPRs. This is the common utility
/// function called by MachineFunction and Function
/// variants of getMaxNumSGPRs.
@@ -1547,29 +1550,16 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
}
- /// \returns the minimum number of ArchVGPRs that will prevent achieving more
- /// than the specified number of waves \p WavesPerEU.
- unsigned getMinNumArchVGPRs(unsigned WavesPerEU) const {
- return AMDGPU::IsaInfo::getMinNumArchVGPRs(this, WavesPerEU);
- }
-
/// \returns the maximum number of VGPRs that can be used and still achieved
/// at least the specified number of waves \p WavesPerEU.
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
}
- /// \returns the maximum number of ArchVGPRs that can be used and still
- /// achieve at least the specified number of waves \p WavesPerEU.
- unsigned getMaxNumArchVGPRs(unsigned WavesPerEU) const {
- return AMDGPU::IsaInfo::getMaxNumArchVGPRs(this, WavesPerEU);
- }
-
- /// \returns max num VGPRs. This is the common utility function called by
- /// MachineFunction and Function variants of getMaxNum[Arch]VGPRs.
+ /// \returns max num VGPRs. This is the common utility function
+ /// called by MachineFunction and Function variants of getMaxNumVGPRs.
unsigned getBaseMaxNumVGPRs(const Function &F,
- std::pair<unsigned, unsigned> NumVGPRBounds,
- bool UnifiedRF) const;
+ std::pair<unsigned, unsigned> NumVGPRBounds) const;
/// \returns Maximum number of VGPRs that meets number of waves per execution
/// unit requirement for function \p F, or number of VGPRs explicitly
@@ -1581,12 +1571,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// unit requirement.
unsigned getMaxNumVGPRs(const Function &F) const;
- /// Returns the maximum number of ArchVGPRs that meets number of waves per
- /// execution unit requirement for function \p F, or number of ArchVGPRs
- /// explicitly requested using "amdgpu-num-vgpr" attribute attached to
- /// function \p F.
- unsigned getMaxNumArchVGPRs(const Function &F) const;
-
unsigned getMaxNumAGPRs(const Function &F) const {
return getMaxNumVGPRs(F);
}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 5f229b310f686..59afcbed35294 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1240,36 +1240,28 @@ unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
return 5;
}
-unsigned getBaseMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
- unsigned TotalNumVGPRs,
- unsigned NumAddressableVGPRs) {
+unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
assert(WavesPerEU != 0);
+
unsigned MaxWavesPerEU = getMaxWavesPerEU(STI);
if (WavesPerEU >= MaxWavesPerEU)
return 0;
- unsigned MinWavesPerEU =
- getNumWavesPerEUWithNumVGPRs(STI, NumAddressableVGPRs);
- if (WavesPerEU < MinWavesPerEU)
- return getMinNumVGPRs(STI, MinWavesPerEU);
+ unsigned TotNumVGPRs = getTotalNumVGPRs(STI);
+ unsigned AddrsableNumVGPRs = getAddressableNumVGPRs(STI);
unsigned Granule = getVGPRAllocGranule(STI);
- unsigned MaxNumVGPRs = alignDown(TotalNumVGPRs / WavesPerEU, Granule);
- if (MaxNumVGPRs == alignDown(TotalNumVGPRs / MaxWavesPerEU, Granule))
+ unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule);
+
+ if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))
return 0;
- unsigned MaxNumVGPRsNext =
- alignDown(TotalNumVGPRs / (WavesPerEU + 1), Granule);
- unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
- return std::min(MinNumVGPRs, NumAddressableVGPRs);
-}
-unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
- return getBaseMinNumVGPRs(STI, WavesPerEU, getTotalNumVGPRs(STI),
- getAddressableNumVGPRs(STI));
-}
+ unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs);
+ if (WavesPerEU < MinWavesPerEU)
+ return getMinNumVGPRs(STI, MinWavesPerEU);
-unsigned getMinNumArchVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
- unsigned TotNumArchVGPRs = getAddressableNumArchVGPRs(STI);
- return getBaseMinNumVGPRs(STI, WavesPerEU, TotNumArchVGPRs, TotNumArchVGPRs);
+ unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);
+ unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
+ return std::min(MinNumVGPRs, AddrsableNumVGPRs);
}
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
@@ -1281,12 +1273,6 @@ unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
return std::min(MaxNumVGPRs, AddressableNumVGPRs);
}
-unsigned getMaxNumArchVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
- assert(WavesPerEU != 0);
- return alignDown(getAddressableNumArchVGPRs(STI) / WavesPerEU,
- getVGPRAllocGranule(STI));
-}
-
unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
std::optional<bool> EnableWavefrontSize32) {
return getGranulatedNumRegisterBlocks(
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index ebcbd0c07506b..67bebfb3418d5 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -318,29 +318,14 @@ unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI);
/// \returns Addressable number of VGPRs for given subtarget \p STI.
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
-/// Returns the minimum number of VGPRs that meets given number of waves per
-/// execution unit requirement for given subtarget \p STI with \p TotNumVGPRs
-/// VGPRs available and \p NumAddressableVGPRs addressable VGPRs.
-unsigned getBaseMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
- unsigned TotalNumVGPRs,
- unsigned NumAddressableVGPRs);
-
/// \returns Minimum number of VGPRs that meets given number of waves per
/// execution unit requirement for given subtarget \p STI.
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
-/// \returns Minimum number of ArchVGPRs that meets given number of waves per
-/// execution unit requirement for given subtarget \p STI.
-unsigned getMinNumArchVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
-
/// \returns Maximum number of VGPRs that meets given number of waves per
/// execution unit requirement for given subtarget \p STI.
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
-/// \returns Maximum number of ArchVGPRs that meets given number of waves per
-/// execution unit requirement for given subtarget \p STI.
-unsigned getMaxNumArchVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
-
/// \returns Number of waves reachable for a given \p NumVGPRs usage for given
/// subtarget \p STI.
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
index 169cc0c51f4a4..8bb8ecb079a34 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
@@ -2745,8 +2745,8 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1232_ITERATIVE-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1232_ITERATIVE-NEXT: s_ctz_i32_b32 s1, s0
; GFX1232_ITERATIVE-NEXT: s_wait_alu 0xfffe
-; GFX1232_ITERATIVE-NEXT: v_readlane_b32 s2, v2, s1
; GFX1232_ITERATIVE-NEXT: v_readlane_b32 s3, v3, s1
+; GFX1232_ITERATIVE-NEXT: v_readlane_b32 s2, v2, s1
; GFX1232_ITERATIVE-NEXT: s_lshl_b32 s8, 1, s1
; GFX1232_ITERATIVE-NEXT: v_writelane_b32 v1, s7, s1
; GFX1232_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s1
@@ -6333,8 +6333,8 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1232_ITERATIVE-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1232_ITERATIVE-NEXT: s_ctz_i32_b32 s1, s0
; GFX1232_ITERATIVE-NEXT: s_wait_alu 0xfffe
-; GFX1232_ITERATIVE-NEXT: v_readlane_b32 s2, v2, s1
; GFX1232_ITERATIVE-NEXT: v_readlane_b32 s3, v3, s1
+; GFX1232_ITERATIVE-NEXT: v_readlane_b32 s2, v2, s1
; GFX1232_ITERATIVE-NEXT: s_lshl_b32 s8, 1, s1
; GFX1232_ITERATIVE-NEXT: v_writelane_b32 v1, s7, s1
; GFX1232_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s1
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
index 2edfe67fcd56f..3c0646c46efd0 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
@@ -1026,24 +1026,23 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_mov_b32_dpp v3, v1 row_shr:1 row_mask:0xf bank_mask:0xf
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX1132_DPP-NEXT: s_mov_b32 s1, s2
+; GFX1132_DPP-NEXT: s_mov_b32 s0, s2
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
-; GFX1132_DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
+; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB2_2
; GFX1132_DPP-NEXT: ; %bb.1:
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, s1
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v4, 0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, s0
; GFX1132_DPP-NEXT: ds_add_rtn_u32 v0, v4, v0
; GFX1132_DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132_DPP-NEXT: buffer_gl0_inv
; GFX1132_DPP-NEXT: .LBB2_2:
-; GFX1132_DPP-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; GFX1132_DPP-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX1132_DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1132_DPP-NEXT: v_readfirstlane_b32 s3, v0
; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, v3
@@ -2807,19 +2806,19 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_mov_b32_dpp v7, v1 row_shr:1 row_mask:0xf bank_mask:0xf
; GFX1132_DPP-NEXT: v_readlane_b32 s6, v1, 15
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
-; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v8, exec_lo, 0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s2, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v6, s3, 16
; GFX1132_DPP-NEXT: v_writelane_b32 v7, s6, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
+; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v8
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
-; GFX1132_DPP-NEXT: s_mov_b32 s3, exec_lo
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr8_vgpr9
-; GFX1132_DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
+; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB6_2
; GFX1132_DPP-NEXT: ; %bb.1:
; GFX1132_DPP-NEXT: v_dual_mov_b32 v9, s1 :: v_dual_mov_b32 v8, s0
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132_DPP-NEXT: ds_add_rtn_u64 v[8:9], v0, v[8:9]
; GFX1132_DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132_DPP-NEXT: buffer_gl0_inv
@@ -4449,24 +4448,23 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_mov_b32_dpp v3, v1 row_shr:1 row_mask:0xf bank_mask:0xf
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX1132_DPP-NEXT: s_mov_b32 s1, s2
+; GFX1132_DPP-NEXT: s_mov_b32 s0, s2
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
-; GFX1132_DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
+; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB10_2
; GFX1132_DPP-NEXT: ; %bb.1:
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, s1
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v4, 0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, s0
; GFX1132_DPP-NEXT: ds_sub_rtn_u32 v0, v4, v0
; GFX1132_DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132_DPP-NEXT: buffer_gl0_inv
; GFX1132_DPP-NEXT: .LBB10_2:
-; GFX1132_DPP-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; GFX1132_DPP-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX1132_DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1132_DPP-NEXT: v_readfirstlane_b32 s3, v0
; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, v3
@@ -6253,19 +6251,19 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_mov_b32_dpp v7, v1 row_shr:1 row_mask:0xf bank_mask:0xf
; GFX1132_DPP-NEXT: v_readlane_b32 s6, v1, 15
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
-; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v8, exec_lo, 0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s2, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v6, s3, 16
; GFX1132_DPP-NEXT: v_writelane_b32 v7, s6, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
+; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v8
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
-; GFX1132_DPP-NEXT: s_mov_b32 s3, exec_lo
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr8_vgpr9
-; GFX1132_DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
+; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB14_2
; GFX1132_DPP-NEXT: ; %bb.1:
; GFX1132_DPP-NEXT: v_dual_mov_b32 v9, s1 :: v_dual_mov_b32 v8, s0
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132_DPP-NEXT: ds_sub_rtn_u64 v[8:9], v0, v[8:9]
; GFX1132_DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132_DPP-NEXT: buffer_gl0_inv
@@ -7197,8 +7195,8 @@ define amdgpu_kernel void @and_i64_varying(ptr addrspace(1) %out) {
; GFX1132_ITERATIVE-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132_ITERATIVE-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s6, v2, s3
; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s7, v3, s3
+; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s6, v2, s3
; GFX1132_ITERATIVE-NEXT: s_lshl_b32 s8, 1, s3
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v1, s1, s3
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v0, s0, s3
@@ -7618,19 +7616,19 @@ define amdgpu_kernel void @and_i64_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_readlane_b32 s6, v2, 15
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
; GFX1132_DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v7, exec_lo, 0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s2, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v6, s3, 16
; GFX1132_DPP-NEXT: v_writelane_b32 v5, s6, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
+; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
-; GFX1132_DPP-NEXT: s_mov_b32 s3, exec_lo
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
-; GFX1132_DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
+; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB16_2
; GFX1132_DPP-NEXT: ; %bb.1:
; GFX1132_DPP-NEXT: v_dual_mov_b32 v8, s1 :: v_dual_mov_b32 v7, s0
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132_DPP-NEXT: ds_and_rtn_b64 v[7:8], v0, v[7:8]
; GFX1132_DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132_DPP-NEXT: buffer_gl0_inv
@@ -8232,24 +8230,23 @@ define amdgpu_kernel void @or_i32_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_mov_b32_dpp v3, v1 row_shr:1 row_mask:0xf bank_mask:0xf
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX1132_DPP-NEXT: s_mov_b32 s1, s2
+; GFX1132_DPP-NEXT: s_mov_b32 s0, s2
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
-; GFX1132_DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
+; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB17_2
; GFX1132_DPP-NEXT: ; %bb.1:
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, s1
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v4, 0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, s0
; GFX1132_DPP-NEXT: ds_or_rtn_b32 v0, v4, v0
; GFX1132_DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132_DPP-NEXT: buffer_gl0_inv
; GFX1132_DPP-NEXT: .LBB17_2:
-; GFX1132_DPP-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; GFX1132_DPP-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX1132_DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1132_DPP-NEXT: v_readfirstlane_b32 s3, v0
; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, v3
@@ -8562,8 +8559,8 @@ define amdgpu_kernel void @or_i64_varying(ptr addrspace(1) %out) {
; GFX1132_ITERATIVE-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132_ITERATIVE-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s6, v2, s3
; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s7, v3, s3
+; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s6, v2, s3
; GFX1132_ITERATIVE-NEXT: s_lshl_b32 s8, 1, s3
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v1, s1, s3
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v0, s0, s3
@@ -8983,19 +8980,19 @@ define amdgpu_kernel void @or_i64_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_readlane_b32 s6, v2, 15
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
; GFX1132_DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v7, exec_lo, 0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s2, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v6, s3, 16
; GFX1132_DPP-NEXT: v_writelane_b32 v5, s6, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
+; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
-; GFX1132_DPP-NEXT: s_mov_b32 s3, exec_lo
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
-; GFX1132_DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
+; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB18_2
; GFX1132_DPP-NEXT: ; %bb.1:
; GFX1132_DPP-NEXT: v_dual_mov_b32 v8, s1 :: v_dual_mov_b32 v7, s0
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132_DPP-NEXT: ds_or_rtn_b64 v[7:8], v0, v[7:8]
; GFX1132_DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132_DPP-NEXT: buffer_gl0_inv
@@ -9597,24 +9594,23 @@ define amdgpu_kernel void @xor_i32_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_mov_b32_dpp v3, v1 row_shr:1 row_mask:0xf bank_mask:0xf
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX1132_DPP-NEXT: s_mov_b32 s1, s2
+; GFX1132_DPP-NEXT: s_mov_b32 s0, s2
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
-; GFX1132_DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
+; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB19_2
; GFX1132_DPP-NEXT: ; %bb.1:
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, s1
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v4, 0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, s0
; GFX1132_DPP-NEXT: ds_xor_rtn_b32 v0, v4, v0
; GFX1132_DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132_DPP-NEXT: buffer_gl0_inv
; GFX1132_DPP-NEXT: .LBB19_2:
-; GFX1132_DPP-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; GFX1132_DPP-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX1132_DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1132_DPP-NEXT: v_readfirstlane_b32 s3, v0
; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, v3
@@ -9927,8 +9923,8 @@ define amdgpu_kernel void @xor_i64_varying(ptr addrspace(1) %out) {
; GFX1132_ITERATIVE-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132_ITERATIVE-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s6, v2, s3
; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s7, v3, s3
+; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s6, v2, s3
; GFX1132_ITERATIVE-NEXT: s_lshl_b32 s8, 1, s3
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v1, s1, s3
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v0, s0, s3
@@ -10348,19 +10344,19 @@ define amdgpu_kernel void @xor_i64_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_readlane_b32 s6, v2, 15
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
; GFX1132_DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v7, exec_lo, 0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s2, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v6, s3, 16
; GFX1132_DPP-NEXT: v_writelane_b32 v5, s6, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
+; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
-; GFX1132_DPP-NEXT: s_mov_b32 s3, exec_lo
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
-; GFX1132_DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
+; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB20_2
; GFX1132_DPP-NEXT: ; %bb.1:
; GFX1132_DPP-NEXT: v_dual_mov_b32 v8, s1 :: v_dual_mov_b32 v7, s0
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132_DPP-NEXT: ds_xor_rtn_b64 v[7:8], v0, v[7:8]
; GFX1132_DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132_DPP-NEXT: buffer_gl0_inv
@@ -11573,8 +11569,8 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) {
; GFX1132_ITERATIVE-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132_ITERATIVE-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s6, v2, s3
; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s7, v3, s3
+; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s6, v2, s3
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v1, s1, s3
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v0, s0, s3
; GFX1132_ITERATIVE-NEXT: v_cmp_gt_i64_e64 s8, s[0:1], s[6:7]
@@ -12181,19 +12177,19 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_mov_b32_dpp v4, v1 row_shr:1 row_mask:0xf bank_mask:0xf
; GFX1132_DPP-NEXT: v_readlane_b32 s6, v1, 15
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
-; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v7, exec_lo, 0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s2, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v5, s3, 16
; GFX1132_DPP-NEXT: v_writelane_b32 v4, s6, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
+; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
-; GFX1132_DPP-NEXT: s_mov_b32 s3, exec_lo
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
-; GFX1132_DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
+; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB23_2
; GFX1132_DPP-NEXT: ; %bb.1:
; GFX1132_DPP-NEXT: v_dual_mov_b32 v8, s1 :: v_dual_mov_b32 v7, s0
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132_DPP-NEXT: ds_max_rtn_i64 v[7:8], v0, v[7:8]
; GFX1132_DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132_DPP-NEXT: buffer_gl0_inv
@@ -13407,8 +13403,8 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) {
; GFX1132_ITERATIVE-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132_ITERATIVE-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s6, v2, s3
; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s7, v3, s3
+; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s6, v2, s3
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v1, s1, s3
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v0, s0, s3
; GFX1132_ITERATIVE-NEXT: v_cmp_lt_i64_e64 s8, s[0:1], s[6:7]
@@ -14015,19 +14011,19 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_mov_b32_dpp v4, v1 row_shr:1 row_mask:0xf bank_mask:0xf
; GFX1132_DPP-NEXT: v_readlane_b32 s6, v1, 15
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
-; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v7, exec_lo, 0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s2, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v5, s3, 16
; GFX1132_DPP-NEXT: v_writelane_b32 v4, s6, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
+; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
-; GFX1132_DPP-NEXT: s_mov_b32 s3, exec_lo
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
-; GFX1132_DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
+; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB26_2
; GFX1132_DPP-NEXT: ; %bb.1:
; GFX1132_DPP-NEXT: v_dual_mov_b32 v8, s1 :: v_dual_mov_b32 v7, s0
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132_DPP-NEXT: ds_min_rtn_i64 v[7:8], v0, v[7:8]
; GFX1132_DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132_DPP-NEXT: buffer_gl0_inv
@@ -14630,24 +14626,23 @@ define amdgpu_kernel void @umax_i32_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_mov_b32_dpp v3, v1 row_shr:1 row_mask:0xf bank_mask:0xf
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX1132_DPP-NEXT: s_mov_b32 s1, s2
+; GFX1132_DPP-NEXT: s_mov_b32 s0, s2
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
-; GFX1132_DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
+; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB27_2
; GFX1132_DPP-NEXT: ; %bb.1:
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, s1
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v4, 0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, s0
; GFX1132_DPP-NEXT: ds_max_rtn_u32 v0, v4, v0
; GFX1132_DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132_DPP-NEXT: buffer_gl0_inv
; GFX1132_DPP-NEXT: .LBB27_2:
-; GFX1132_DPP-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; GFX1132_DPP-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX1132_DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1132_DPP-NEXT: v_readfirstlane_b32 s3, v0
; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, v3
@@ -15231,8 +15226,8 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) {
; GFX1132_ITERATIVE-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132_ITERATIVE-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s6, v2, s3
; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s7, v3, s3
+; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s6, v2, s3
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v1, s1, s3
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v0, s0, s3
; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -15835,19 +15830,19 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_mov_b32_dpp v4, v1 row_shr:1 row_mask:0xf bank_mask:0xf
; GFX1132_DPP-NEXT: v_readlane_b32 s6, v1, 15
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
-; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v7, exec_lo, 0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s2, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v5, s3, 16
; GFX1132_DPP-NEXT: v_writelane_b32 v4, s6, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
+; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
-; GFX1132_DPP-NEXT: s_mov_b32 s3, exec_lo
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
-; GFX1132_DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
+; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB29_2
; GFX1132_DPP-NEXT: ; %bb.1:
; GFX1132_DPP-NEXT: v_dual_mov_b32 v8, s1 :: v_dual_mov_b32 v7, s0
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132_DPP-NEXT: ds_max_rtn_u64 v[7:8], v0, v[7:8]
; GFX1132_DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132_DPP-NEXT: buffer_gl0_inv
@@ -17051,8 +17046,8 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) {
; GFX1132_ITERATIVE-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132_ITERATIVE-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s6, v2, s3
; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s7, v3, s3
+; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s6, v2, s3
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v1, s1, s3
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v0, s0, s3
; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -17653,19 +17648,19 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_mov_b32_dpp v4, v1 row_shr:1 row_mask:0xf bank_mask:0xf
; GFX1132_DPP-NEXT: v_readlane_b32 s6, v1, 15
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
-; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1132_DPP-NEXT: v_mbcnt_lo_u32_b32 v7, exec_lo, 0
+; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s2, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v5, s3, 16
; GFX1132_DPP-NEXT: v_writelane_b32 v4, s6, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
+; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
-; GFX1132_DPP-NEXT: s_mov_b32 s3, exec_lo
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
-; GFX1132_DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
+; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB32_2
; GFX1132_DPP-NEXT: ; %bb.1:
; GFX1132_DPP-NEXT: v_dual_mov_b32 v8, s1 :: v_dual_mov_b32 v7, s0
-; GFX1132_DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132_DPP-NEXT: ds_min_rtn_u64 v[7:8], v0, v[7:8]
; GFX1132_DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132_DPP-NEXT: buffer_gl0_inv
diff --git a/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll b/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll
index 7c0ae8e726ad4..d61c4b46596c0 100644
--- a/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll
@@ -2084,14 +2084,15 @@ define void @test_dynamic_stackalloc_device_multiple_allocas(i32 %n, i32 %m) {
; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v1
; GFX11-SDAG-NEXT: .LBB14_6: ; %bb.1
; GFX11-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s1
-; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15
+; GFX11-SDAG-NEXT: v_lshl_add_u32 v1, v0, 2, 15
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 2
; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_and_b32_e32 v0, -16, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_and_b32_e32 v1, -16, v1
; GFX11-SDAG-NEXT: .LBB14_7: ; =>This Inner Loop Header: Depth=1
; GFX11-SDAG-NEXT: s_ctz_i32_b32 s2, s1
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX11-SDAG-NEXT: v_readlane_b32 s3, v0, s2
+; GFX11-SDAG-NEXT: v_readlane_b32 s3, v1, s2
; GFX11-SDAG-NEXT: s_bitset0_b32 s1, s2
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-NEXT: s_max_u32 s0, s0, s3
@@ -2099,16 +2100,16 @@ define void @test_dynamic_stackalloc_device_multiple_allocas(i32 %n, i32 %m) {
; GFX11-SDAG-NEXT: s_cbranch_scc1 .LBB14_7
; GFX11-SDAG-NEXT: ; %bb.8:
; GFX11-SDAG-NEXT: s_mov_b32 s1, s32
-; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2
-; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, s0, 5, s1
-; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s33 dlc
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 1
+; GFX11-SDAG-NEXT: v_lshl_add_u32 v1, s0, 5, s1
+; GFX11-SDAG-NEXT: scratch_store_b32 off, v2, s33 dlc
; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-SDAG-NEXT: scratch_store_b32 off, v2, s1 dlc
+; GFX11-SDAG-NEXT: scratch_store_b32 off, v0, s1 dlc
; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-SDAG-NEXT: s_mov_b32 s33, s7
-; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v0
+; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v1
; GFX11-SDAG-NEXT: s_mov_b32 s32, s34
; GFX11-SDAG-NEXT: s_mov_b32 s34, s8
+; GFX11-SDAG-NEXT: s_mov_b32 s33, s7
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_multiple_allocas:
@@ -2363,14 +2364,15 @@ define void @test_dynamic_stackalloc_device_control_flow(i32 %n, i32 %m) {
; GFX11-SDAG-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX11-SDAG-NEXT: s_cbranch_execz .LBB15_4
; GFX11-SDAG-NEXT: ; %bb.1: ; %bb.1
-; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v1, 2, 15
+; GFX11-SDAG-NEXT: v_lshl_add_u32 v1, v1, 2, 15
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 2
; GFX11-SDAG-NEXT: s_mov_b32 s2, exec_lo
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_and_b32_e32 v0, -16, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_and_b32_e32 v1, -16, v1
; GFX11-SDAG-NEXT: .LBB15_2: ; =>This Inner Loop Header: Depth=1
; GFX11-SDAG-NEXT: s_ctz_i32_b32 s3, s2
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX11-SDAG-NEXT: v_readlane_b32 s4, v0, s3
+; GFX11-SDAG-NEXT: v_readlane_b32 s4, v1, s3
; GFX11-SDAG-NEXT: s_bitset0_b32 s2, s3
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-NEXT: s_max_u32 s1, s1, s4
@@ -2378,14 +2380,13 @@ define void @test_dynamic_stackalloc_device_control_flow(i32 %n, i32 %m) {
; GFX11-SDAG-NEXT: s_cbranch_scc1 .LBB15_2
; GFX11-SDAG-NEXT: ; %bb.3:
; GFX11-SDAG-NEXT: s_add_i32 s2, s32, 0x7ff
-; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 2
-; GFX11-SDAG-NEXT: s_and_b32 s2, s2, 0xfffff800
; GFX11-SDAG-NEXT: ; implicit-def: $vgpr31
-; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, s1, 5, s2
-; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s2 dlc
+; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-NEXT: s_and_b32 s2, s2, 0xfffff800
+; GFX11-SDAG-NEXT: v_lshl_add_u32 v1, s1, 5, s2
+; GFX11-SDAG-NEXT: scratch_store_b32 off, v0, s2 dlc
; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v0
+; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v1
; GFX11-SDAG-NEXT: .LBB15_4: ; %Flow
; GFX11-SDAG-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX11-SDAG-NEXT: s_cbranch_execz .LBB15_8
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
index 2a17e5733f597..15be44a335a1d 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
@@ -2585,16 +2585,17 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_agent_scope_
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132-NEXT: v_mov_b32_e32 v3, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_load_b32 s3, s[0:1], 0x0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1132-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mul_f32 v2, 4.0, v0
; GFX1132-NEXT: .LBB4_2: ; %atomicrmw.start
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-NEXT: v_dual_add_f32 v0, v1, v2 :: v_dual_mov_b32 v3, 0
+; GFX1132-NEXT: v_add_f32_e32 v0, v1, v2
; GFX1132-NEXT: global_atomic_cmpswap_b32 v0, v3, v[0:1], s[0:1] glc
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
@@ -2834,16 +2835,17 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_agent_scope_
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_load_b32 s3, s[0:1], 0x0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mul_f32 v2, 4.0, v0
; GFX1132-DPP-NEXT: .LBB4_2: ; %atomicrmw.start
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_dual_add_f32 v0, v1, v2 :: v_dual_mov_b32 v3, 0
+; GFX1132-DPP-NEXT: v_add_f32_e32 v0, v1, v2
; GFX1132-DPP-NEXT: global_atomic_cmpswap_b32 v0, v3, v[0:1], s[0:1] glc
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
@@ -4639,16 +4641,17 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_default_scop
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132-NEXT: v_mov_b32_e32 v3, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_load_b32 s3, s[0:1], 0x0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1132-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mul_f32 v2, 4.0, v0
; GFX1132-NEXT: .LBB7_2: ; %atomicrmw.start
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-NEXT: v_dual_add_f32 v0, v1, v2 :: v_dual_mov_b32 v3, 0
+; GFX1132-NEXT: v_add_f32_e32 v0, v1, v2
; GFX1132-NEXT: global_atomic_cmpswap_b32 v0, v3, v[0:1], s[0:1] glc
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
@@ -4888,16 +4891,17 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_default_scop
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_load_b32 s3, s[0:1], 0x0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mul_f32 v2, 4.0, v0
; GFX1132-DPP-NEXT: .LBB7_2: ; %atomicrmw.start
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_dual_add_f32 v0, v1, v2 :: v_dual_mov_b32 v3, 0
+; GFX1132-DPP-NEXT: v_add_f32_e32 v0, v1, v2
; GFX1132-DPP-NEXT: global_atomic_cmpswap_b32 v0, v3, v[0:1], s[0:1] glc
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
@@ -7928,17 +7932,16 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v41, v8
-; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v41, v8
+; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_mov_b32_e32 v42, v9
; GFX1132-DPP-NEXT: s_mov_b32 s46, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
-; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
+; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1132-DPP-NEXT: ; %bb.1:
; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45]
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
@@ -8219,9 +8222,10 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_one_a
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132-NEXT: v_mov_b32_e32 v6, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132-NEXT: v_mul_f64 v[4:5], 4.0, v[0:1]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
@@ -8229,7 +8233,6 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_one_a
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_add_f64 v[0:1], v[2:3], v[4:5]
-; GFX1132-NEXT: v_mov_b32_e32 v6, 0
; GFX1132-NEXT: global_atomic_cmpswap_b64 v[0:1], v6, v[0:3], s[0:1] glc
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[2:3]
@@ -8476,9 +8479,10 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_one_a
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_mul_f64 v[4:5], 4.0, v[0:1]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
@@ -8486,7 +8490,6 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_one_a
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_add_f64 v[0:1], v[2:3], v[4:5]
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1132-DPP-NEXT: global_atomic_cmpswap_b64 v[0:1], v6, v[0:3], s[0:1] glc
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[2:3]
@@ -9652,9 +9655,10 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132-NEXT: v_mov_b32_e32 v6, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132-NEXT: v_mul_f64 v[4:5], 4.0, v[0:1]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
@@ -9662,7 +9666,6 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_add_f64 v[0:1], v[2:3], v[4:5]
-; GFX1132-NEXT: v_mov_b32_e32 v6, 0
; GFX1132-NEXT: global_atomic_cmpswap_b64 v[0:1], v6, v[0:3], s[0:1] glc
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[2:3]
@@ -9909,9 +9912,10 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_mul_f64 v[4:5], 4.0, v[0:1]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
@@ -9919,7 +9923,6 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_add_f64 v[0:1], v[2:3], v[4:5]
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1132-DPP-NEXT: global_atomic_cmpswap_b64 v[0:1], v6, v[0:3], s[0:1] glc
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[2:3]
@@ -13985,17 +13988,16 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v41, v8
-; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v41, v8
+; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_mov_b32_e32 v42, v9
; GFX1132-DPP-NEXT: s_mov_b32 s46, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
-; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
+; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB17_3
; GFX1132-DPP-NEXT: ; %bb.1:
; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45]
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
index 2892641017296..a4410bb9ed2d0 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
@@ -5353,17 +5353,16 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11]
; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, v8
-; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, v8
+; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, v9
; GFX1132-DPP-NEXT: s_mov_b32 s46, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
+; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB7_3
; GFX1132-DPP-NEXT: ; %bb.1:
; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45]
@@ -5557,15 +5556,15 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_one_a
; GFX1132-NEXT: s_cbranch_execz .LBB8_3
; GFX1132-NEXT: ; %bb.1:
; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132-NEXT: v_mov_b32_e32 v4, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
; GFX1132-NEXT: .LBB8_2: ; %atomicrmw.start
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1132-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3]
-; GFX1132-NEXT: v_mov_b32_e32 v4, 0
; GFX1132-NEXT: v_max_f64 v[0:1], v[0:1], 4.0
; GFX1132-NEXT: global_atomic_cmpswap_b64 v[0:1], v4, v[0:3], s[0:1] glc
; GFX1132-NEXT: s_waitcnt vmcnt(0)
@@ -5723,15 +5722,15 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_one_a
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB8_3
; GFX1132-DPP-NEXT: ; %bb.1:
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
; GFX1132-DPP-NEXT: .LBB8_2: ; %atomicrmw.start
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3]
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1132-DPP-NEXT: v_max_f64 v[0:1], v[0:1], 4.0
; GFX1132-DPP-NEXT: global_atomic_cmpswap_b64 v[0:1], v4, v[0:3], s[0:1] glc
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
@@ -8902,17 +8901,16 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11]
; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, v8
-; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, v8
+; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, v9
; GFX1132-DPP-NEXT: s_mov_b32 s46, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
+; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB11_3
; GFX1132-DPP-NEXT: ; %bb.1:
; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45]
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
index 40cca6bfc63cb..68d7dcc60506c 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
@@ -5353,17 +5353,16 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: v_min_f64 v[8:9], v[8:9], v[10:11]
; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, v8
-; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, v8
+; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, v9
; GFX1132-DPP-NEXT: s_mov_b32 s46, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
+; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB7_3
; GFX1132-DPP-NEXT: ; %bb.1:
; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45]
@@ -5557,15 +5556,15 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_one_a
; GFX1132-NEXT: s_cbranch_execz .LBB8_3
; GFX1132-NEXT: ; %bb.1:
; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132-NEXT: v_mov_b32_e32 v4, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
; GFX1132-NEXT: .LBB8_2: ; %atomicrmw.start
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1132-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3]
-; GFX1132-NEXT: v_mov_b32_e32 v4, 0
; GFX1132-NEXT: v_min_f64 v[0:1], v[0:1], 4.0
; GFX1132-NEXT: global_atomic_cmpswap_b64 v[0:1], v4, v[0:3], s[0:1] glc
; GFX1132-NEXT: s_waitcnt vmcnt(0)
@@ -5723,15 +5722,15 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_one_a
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB8_3
; GFX1132-DPP-NEXT: ; %bb.1:
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
; GFX1132-DPP-NEXT: .LBB8_2: ; %atomicrmw.start
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3]
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1132-DPP-NEXT: v_min_f64 v[0:1], v[0:1], 4.0
; GFX1132-DPP-NEXT: global_atomic_cmpswap_b64 v[0:1], v4, v[0:3], s[0:1] glc
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
@@ -8902,17 +8901,16 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: v_min_f64 v[8:9], v[8:9], v[10:11]
; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, v8
-; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, v8
+; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, v9
; GFX1132-DPP-NEXT: s_mov_b32 s46, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
+; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB11_3
; GFX1132-DPP-NEXT: ; %bb.1:
; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45]
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
index e2d58b80e01f7..7126680525b87 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
@@ -195,16 +195,17 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_
; GFX1132-NEXT: ; %bb.1:
; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1132-NEXT: s_bcnt1_i32_b32 s3, s3
-; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX1132-NEXT: v_mov_b32_e32 v3, 0
; GFX1132-NEXT: v_cvt_f32_ubyte0_e32 v0, s3
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_load_b32 s4, s[0:1], 0x0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_dual_mul_f32 v2, 4.0, v0 :: v_dual_mov_b32 v1, s4
; GFX1132-NEXT: .LBB0_2: ; %atomicrmw.start
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-NEXT: v_dual_sub_f32 v0, v1, v2 :: v_dual_mov_b32 v3, 0
+; GFX1132-NEXT: v_sub_f32_e32 v0, v1, v2
; GFX1132-NEXT: global_atomic_cmpswap_b32 v0, v3, v[0:1], s[0:1] glc
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
@@ -395,16 +396,17 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_
; GFX1132-DPP-NEXT: ; %bb.1:
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1132-DPP-NEXT: s_bcnt1_i32_b32 s3, s3
-; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, 0
; GFX1132-DPP-NEXT: v_cvt_f32_ubyte0_e32 v0, s3
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_load_b32 s4, s[0:1], 0x0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_dual_mul_f32 v2, 4.0, v0 :: v_dual_mov_b32 v1, s4
; GFX1132-DPP-NEXT: .LBB0_2: ; %atomicrmw.start
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_dual_sub_f32 v0, v1, v2 :: v_dual_mov_b32 v3, 0
+; GFX1132-DPP-NEXT: v_sub_f32_e32 v0, v1, v2
; GFX1132-DPP-NEXT: global_atomic_cmpswap_b32 v0, v3, v[0:1], s[0:1] glc
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
@@ -1475,16 +1477,17 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_one_as_scope
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132-NEXT: v_mov_b32_e32 v3, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_load_b32 s3, s[0:1], 0x0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1132-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mul_f32 v2, 4.0, v0
; GFX1132-NEXT: .LBB2_2: ; %atomicrmw.start
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-NEXT: v_dual_sub_f32 v0, v1, v2 :: v_dual_mov_b32 v3, 0
+; GFX1132-NEXT: v_sub_f32_e32 v0, v1, v2
; GFX1132-NEXT: global_atomic_cmpswap_b32 v0, v3, v[0:1], s[0:1] glc
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
@@ -1724,16 +1727,17 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_one_as_scope
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_load_b32 s3, s[0:1], 0x0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mul_f32 v2, 4.0, v0
; GFX1132-DPP-NEXT: .LBB2_2: ; %atomicrmw.start
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_dual_sub_f32 v0, v1, v2 :: v_dual_mov_b32 v3, 0
+; GFX1132-DPP-NEXT: v_sub_f32_e32 v0, v1, v2
; GFX1132-DPP-NEXT: global_atomic_cmpswap_b32 v0, v3, v[0:1], s[0:1] glc
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
@@ -2805,16 +2809,17 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132-NEXT: v_mov_b32_e32 v3, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_load_b32 s3, s[0:1], 0x0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1132-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mul_f32 v2, 4.0, v0
; GFX1132-NEXT: .LBB4_2: ; %atomicrmw.start
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-NEXT: v_dual_sub_f32 v0, v1, v2 :: v_dual_mov_b32 v3, 0
+; GFX1132-NEXT: v_sub_f32_e32 v0, v1, v2
; GFX1132-NEXT: global_atomic_cmpswap_b32 v0, v3, v[0:1], s[0:1] glc
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
@@ -3054,16 +3059,17 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_load_b32 s3, s[0:1], 0x0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mul_f32 v2, 4.0, v0
; GFX1132-DPP-NEXT: .LBB4_2: ; %atomicrmw.start
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_dual_sub_f32 v0, v1, v2 :: v_dual_mov_b32 v3, 0
+; GFX1132-DPP-NEXT: v_sub_f32_e32 v0, v1, v2
; GFX1132-DPP-NEXT: global_atomic_cmpswap_b32 v0, v3, v[0:1], s[0:1] glc
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
@@ -4963,16 +4969,17 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_default_scop
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132-NEXT: v_mov_b32_e32 v3, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_load_b32 s3, s[0:1], 0x0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1132-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mul_f32 v2, 4.0, v0
; GFX1132-NEXT: .LBB7_2: ; %atomicrmw.start
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-NEXT: v_dual_sub_f32 v0, v1, v2 :: v_dual_mov_b32 v3, 0
+; GFX1132-NEXT: v_sub_f32_e32 v0, v1, v2
; GFX1132-NEXT: global_atomic_cmpswap_b32 v0, v3, v[0:1], s[0:1] glc
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
@@ -5212,16 +5219,17 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_default_scop
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_load_b32 s3, s[0:1], 0x0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mul_f32 v2, 4.0, v0
; GFX1132-DPP-NEXT: .LBB7_2: ; %atomicrmw.start
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_dual_sub_f32 v0, v1, v2 :: v_dual_mov_b32 v3, 0
+; GFX1132-DPP-NEXT: v_sub_f32_e32 v0, v1, v2
; GFX1132-DPP-NEXT: global_atomic_cmpswap_b32 v0, v3, v[0:1], s[0:1] glc
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
@@ -8252,17 +8260,16 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v41, v8
-; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v41, v8
+; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_mov_b32_e32 v42, v9
; GFX1132-DPP-NEXT: s_mov_b32 s46, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
-; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
+; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1132-DPP-NEXT: ; %bb.1:
; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45]
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
@@ -8543,9 +8550,10 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_one_a
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132-NEXT: v_mov_b32_e32 v6, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132-NEXT: v_mul_f64 v[4:5], 4.0, v[0:1]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
@@ -8553,7 +8561,6 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_one_a
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_add_f64 v[0:1], v[2:3], -v[4:5]
-; GFX1132-NEXT: v_mov_b32_e32 v6, 0
; GFX1132-NEXT: global_atomic_cmpswap_b64 v[0:1], v6, v[0:3], s[0:1] glc
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[2:3]
@@ -8800,9 +8807,10 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_one_a
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_mul_f64 v[4:5], 4.0, v[0:1]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
@@ -8810,7 +8818,6 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_one_a
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_add_f64 v[0:1], v[2:3], -v[4:5]
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1132-DPP-NEXT: global_atomic_cmpswap_b64 v[0:1], v6, v[0:3], s[0:1] glc
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[2:3]
@@ -9975,9 +9982,10 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132-NEXT: v_mov_b32_e32 v6, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132-NEXT: v_mul_f64 v[4:5], 4.0, v[0:1]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
@@ -9985,7 +9993,6 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_add_f64 v[0:1], v[2:3], -v[4:5]
-; GFX1132-NEXT: v_mov_b32_e32 v6, 0
; GFX1132-NEXT: global_atomic_cmpswap_b64 v[0:1], v6, v[0:3], s[0:1] glc
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[2:3]
@@ -10232,9 +10239,10 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_mul_f64 v[4:5], 4.0, v[0:1]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
@@ -10242,7 +10250,6 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_add_f64 v[0:1], v[2:3], -v[4:5]
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1132-DPP-NEXT: global_atomic_cmpswap_b64 v[0:1], v6, v[0:3], s[0:1] glc
; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX1132-DPP-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[2:3]
@@ -14307,17 +14314,16 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v41, v8
-; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v41, v8
+; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_mov_b32_e32 v42, v9
; GFX1132-DPP-NEXT: s_mov_b32 s46, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
-; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
+; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB17_3
; GFX1132-DPP-NEXT: ; %bb.1:
; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45]
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
diff --git a/llvm/test/CodeGen/AMDGPU/idiv-licm.ll b/llvm/test/CodeGen/AMDGPU/idiv-licm.ll
index c71d080b066b0..5fab0c50bbe57 100644
--- a/llvm/test/CodeGen/AMDGPU/idiv-licm.ll
+++ b/llvm/test/CodeGen/AMDGPU/idiv-licm.ll
@@ -565,23 +565,22 @@ define amdgpu_kernel void @srem32_invariant_denom(ptr addrspace(1) nocapture %ar
;
; GFX11-LABEL: srem32_invariant_denom:
; GFX11: ; %bb.0: ; %bb
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c
-; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x2c
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_abs_i32 s2, s2
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT: s_abs_i32 s2, s0
+; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX11-NEXT: v_cvt_f32_u32_e32 v0, s2
; GFX11-NEXT: s_sub_i32 s3, 0, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_readfirstlane_b32 s4, v0
; GFX11-NEXT: v_mov_b32_e32 v0, 0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_mul_i32 s3, s3, s4
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_mul_hi_u32 s5, s4, s3
; GFX11-NEXT: s_mov_b32 s3, 0
; GFX11-NEXT: s_add_i32 s4, s4, s5
@@ -602,6 +601,7 @@ define amdgpu_kernel void @srem32_invariant_denom(ptr addrspace(1) nocapture %ar
; GFX11-NEXT: s_cselect_b32 s5, s6, s5
; GFX11-NEXT: s_add_i32 s3, s3, 1
; GFX11-NEXT: v_mov_b32_e32 v1, s5
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_add_u32 s0, s0, 4
; GFX11-NEXT: s_addc_u32 s1, s1, 0
@@ -694,32 +694,31 @@ define amdgpu_kernel void @udiv16_invariant_denom(ptr addrspace(1) nocapture %ar
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_and_b32 s2, s2, 0xffff
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f32_u32_e32 v0, s2
; GFX11-NEXT: s_mov_b32 s2, 0
+; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v0
; GFX11-NEXT: .p2align 6
; GFX11-NEXT: .LBB4_1: ; %bb3
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v0
; GFX11-NEXT: s_and_b32 s3, 0xffff, s2
; GFX11-NEXT: s_add_i32 s2, s2, 1
; GFX11-NEXT: v_cvt_f32_u32_e32 v2, s3
; GFX11-NEXT: s_lshl_b32 s3, s3, 1
-; GFX11-NEXT: v_mov_b32_e32 v3, s3
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_mov_b32_e32 v4, s3
; GFX11-NEXT: s_and_b32 s3, s2, 0xffff
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NEXT: s_cmpk_eq_i32 s3, 0x400
; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v1, v2, v1
-; GFX11-NEXT: v_trunc_f32_e32 v1, v1
+; GFX11-NEXT: v_mul_f32_e32 v3, v2, v1
+; GFX11-NEXT: s_cmpk_eq_i32 s3, 0x400
+; GFX11-NEXT: v_trunc_f32_e32 v3, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_fma_f32 v2, -v1, v0, v2
-; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v1
+; GFX11-NEXT: v_fma_f32 v2, -v3, v0, v2
+; GFX11-NEXT: v_cvt_u32_f32_e32 v3, v3
; GFX11-NEXT: v_cmp_ge_f32_e64 vcc_lo, |v2|, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
-; GFX11-NEXT: global_store_b16 v3, v1, s[0:1]
+; GFX11-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, 0, v3, vcc_lo
+; GFX11-NEXT: global_store_b16 v4, v2, s[0:1]
; GFX11-NEXT: s_cbranch_scc0 .LBB4_1
; GFX11-NEXT: ; %bb.2: ; %bb2
; GFX11-NEXT: s_endpgm
@@ -813,34 +812,33 @@ define amdgpu_kernel void @urem16_invariant_denom(ptr addrspace(1) nocapture %ar
; GFX11-NEXT: s_mov_b32 s3, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_and_b32 s2, s2, 0xffff
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f32_u32_e32 v0, s2
+; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v0
; GFX11-NEXT: .p2align 6
; GFX11-NEXT: .LBB5_1: ; %bb3
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v0
; GFX11-NEXT: s_and_b32 s4, 0xffff, s3
; GFX11-NEXT: s_add_i32 s3, s3, 1
; GFX11-NEXT: v_cvt_f32_u32_e32 v2, s4
; GFX11-NEXT: s_lshl_b32 s5, s4, 1
; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v1, v2, v1
+; GFX11-NEXT: v_mul_f32_e32 v3, v2, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_trunc_f32_e32 v1, v1
-; GFX11-NEXT: v_fma_f32 v2, -v1, v0, v2
-; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_trunc_f32_e32 v3, v3
+; GFX11-NEXT: v_fma_f32 v2, -v3, v0, v2
+; GFX11-NEXT: v_cvt_u32_f32_e32 v3, v3
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_cmp_ge_f32_e64 vcc_lo, |v2|, v0
-; GFX11-NEXT: v_mov_b32_e32 v2, s5
-; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_mul_lo_u32 v1, v1, s2
-; GFX11-NEXT: v_sub_nc_u32_e32 v1, s4, v1
+; GFX11-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, 0, v3, vcc_lo
+; GFX11-NEXT: v_mov_b32_e32 v3, s5
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_mul_lo_u32 v2, v2, s2
+; GFX11-NEXT: v_sub_nc_u32_e32 v2, s4, v2
; GFX11-NEXT: s_and_b32 s4, s3, 0xffff
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_cmpk_eq_i32 s4, 0x400
-; GFX11-NEXT: global_store_b16 v2, v1, s[0:1]
+; GFX11-NEXT: global_store_b16 v3, v2, s[0:1]
; GFX11-NEXT: s_cbranch_scc0 .LBB5_1
; GFX11-NEXT: ; %bb.2: ; %bb2
; GFX11-NEXT: s_endpgm
@@ -942,37 +940,38 @@ define amdgpu_kernel void @sdiv16_invariant_denom(ptr addrspace(1) nocapture %ar
; GFX11-NEXT: s_mov_b32 s3, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_sext_i32_i16 s2, s2
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f32_i32_e32 v0, s2
+; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v0
; GFX11-NEXT: .p2align 6
; GFX11-NEXT: .LBB6_1: ; %bb3
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v0
; GFX11-NEXT: s_sext_i32_i16 s4, s3
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: v_cvt_f32_i32_e32 v2, s4
; GFX11-NEXT: s_xor_b32 s4, s4, s2
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_ashr_i32 s4, s4, 30
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-NEXT: s_or_b32 s4, s4, 1
; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v1, v2, v1
+; GFX11-NEXT: v_mul_f32_e32 v3, v2, v1
+; GFX11-NEXT: v_trunc_f32_e32 v3, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_trunc_f32_e32 v1, v1
-; GFX11-NEXT: v_fma_f32 v2, -v1, v0, v2
-; GFX11-NEXT: v_cvt_i32_f32_e32 v1, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_fma_f32 v2, -v3, v0, v2
; GFX11-NEXT: v_cmp_ge_f32_e64 s5, |v2|, |v0|
+; GFX11-NEXT: v_cvt_i32_f32_e32 v2, v3
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-NEXT: s_and_b32 s5, s5, exec_lo
; GFX11-NEXT: s_cselect_b32 s4, s4, 0
; GFX11-NEXT: s_and_b32 s5, 0xffff, s3
-; GFX11-NEXT: s_add_i32 s3, s3, 1
+; GFX11-NEXT: v_add_nc_u32_e32 v2, s4, v2
; GFX11-NEXT: s_lshl_b32 s5, s5, 1
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_add_nc_u32 v1, s4, v1
+; GFX11-NEXT: s_add_i32 s3, s3, 1
+; GFX11-NEXT: v_mov_b32_e32 v3, s5
; GFX11-NEXT: s_and_b32 s4, s3, 0xffff
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_cmpk_eq_i32 s4, 0x400
-; GFX11-NEXT: global_store_b16 v2, v1, s[0:1]
+; GFX11-NEXT: global_store_b16 v3, v2, s[0:1]
; GFX11-NEXT: s_cbranch_scc0 .LBB6_1
; GFX11-NEXT: ; %bb.2: ; %bb2
; GFX11-NEXT: s_endpgm
@@ -1078,42 +1077,42 @@ define amdgpu_kernel void @srem16_invariant_denom(ptr addrspace(1) nocapture %ar
; GFX11-NEXT: s_mov_b32 s3, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_sext_i32_i16 s2, s2
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f32_i32_e32 v0, s2
+; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v0
; GFX11-NEXT: .p2align 6
; GFX11-NEXT: .LBB7_1: ; %bb3
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v0
; GFX11-NEXT: s_sext_i32_i16 s4, s3
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: v_cvt_f32_i32_e32 v2, s4
; GFX11-NEXT: s_xor_b32 s5, s4, s2
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_ashr_i32 s5, s5, 30
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-NEXT: s_or_b32 s5, s5, 1
; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v1, v2, v1
+; GFX11-NEXT: v_mul_f32_e32 v3, v2, v1
+; GFX11-NEXT: v_trunc_f32_e32 v3, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_trunc_f32_e32 v1, v1
-; GFX11-NEXT: v_fma_f32 v2, -v1, v0, v2
-; GFX11-NEXT: v_cvt_i32_f32_e32 v1, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_fma_f32 v2, -v3, v0, v2
; GFX11-NEXT: v_cmp_ge_f32_e64 s6, |v2|, |v0|
+; GFX11-NEXT: v_cvt_i32_f32_e32 v2, v3
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: s_and_b32 s6, s6, exec_lo
; GFX11-NEXT: s_cselect_b32 s5, s5, 0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT: v_add_nc_u32_e32 v1, s5, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT: v_add_nc_u32_e32 v2, s5, v2
; GFX11-NEXT: s_and_b32 s5, 0xffff, s3
; GFX11-NEXT: s_add_i32 s3, s3, 1
; GFX11-NEXT: s_lshl_b32 s5, s5, 1
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_mov_b32_e32 v2, s5
-; GFX11-NEXT: v_mul_lo_u32 v1, v1, s2
-; GFX11-NEXT: v_sub_nc_u32_e32 v1, s4, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mul_lo_u32 v2, v2, s2
+; GFX11-NEXT: v_mov_b32_e32 v3, s5
+; GFX11-NEXT: v_sub_nc_u32_e32 v2, s4, v2
; GFX11-NEXT: s_and_b32 s4, s3, 0xffff
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_cmpk_eq_i32 s4, 0x400
-; GFX11-NEXT: global_store_b16 v2, v1, s[0:1]
+; GFX11-NEXT: global_store_b16 v3, v2, s[0:1]
; GFX11-NEXT: s_cbranch_scc0 .LBB7_1
; GFX11-NEXT: ; %bb.2: ; %bb2
; GFX11-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
index 0317df6f6a044..f72f1e52d135f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
@@ -463,7 +463,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX1132DAGISEL-LABEL: divergent_value:
; GFX1132DAGISEL: ; %bb.0: ; %entry
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0
; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
@@ -476,7 +476,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
+; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
index 5ec0224898bfd..4551c60770bdf 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
@@ -464,7 +464,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-LABEL: divergent_value:
; GFX1132DAGISEL: ; %bb.0: ; %entry
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, -1
; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
@@ -477,7 +477,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
+; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.atomic.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.atomic.buffer.load.ll
index e16ac07236e30..eb2d95e4db2d5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.atomic.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.atomic.buffer.load.ll
@@ -38,15 +38,14 @@ define amdgpu_kernel void @struct_atomic_buffer_load_i32_const_idx(<4 x i32> %ad
; CHECK-LABEL: struct_atomic_buffer_load_i32_const_idx:
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; CHECK-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: .LBB1_1: ; %bb1
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: v_mov_b32_e32 v1, 15
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: buffer_load_b32 v1, v1, s[0:3], 0 idxen glc
+; CHECK-NEXT: buffer_load_b32 v2, v1, s[0:3], 0 idxen glc
; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, v1, v0
+; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v0
; CHECK-NEXT: s_or_b32 s4, vcc_lo, s4
; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; CHECK-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.atomic.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.atomic.buffer.load.ll
index cc34020e0a28a..bc50b12b59049 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.atomic.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.atomic.buffer.load.ll
@@ -38,15 +38,14 @@ define amdgpu_kernel void @struct_ptr_atomic_buffer_load_i32_const_idx(ptr addrs
; CHECK-LABEL: struct_ptr_atomic_buffer_load_i32_const_idx:
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; CHECK-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: .LBB1_1: ; %bb1
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: v_mov_b32_e32 v1, 15
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: buffer_load_b32 v1, v1, s[0:3], 0 idxen glc
+; CHECK-NEXT: buffer_load_b32 v2, v1, s[0:3], 0 idxen glc
; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, v1, v0
+; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v0
; CHECK-NEXT: s_or_b32 s4, vcc_lo, s4
; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; CHECK-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4
diff --git a/llvm/test/CodeGen/AMDGPU/loop-prefetch-data.ll b/llvm/test/CodeGen/AMDGPU/loop-prefetch-data.ll
index 506753f8640e1..dd77e575e7505 100644
--- a/llvm/test/CodeGen/AMDGPU/loop-prefetch-data.ll
+++ b/llvm/test/CodeGen/AMDGPU/loop-prefetch-data.ll
@@ -100,7 +100,7 @@ define amdgpu_kernel void @copy_constant(ptr addrspace(1) nocapture %d, ptr addr
; GCN-NEXT: s_cbranch_scc1 .LBB2_3
; GCN-NEXT: ; %bb.1: ; %for.body.preheader
; GCN-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GCN-NEXT: v_mov_b32_e32 v4, 0
+; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: .LBB2_2: ; %for.body
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: s_wait_kmcnt 0x0
@@ -110,9 +110,9 @@ define amdgpu_kernel void @copy_constant(ptr addrspace(1) nocapture %d, ptr addr
; GCN-NEXT: s_add_nc_u64 s[2:3], s[2:3], 16
; GCN-NEXT: s_cmp_lg_u32 s6, 0
; GCN-NEXT: s_wait_kmcnt 0x0
-; GCN-NEXT: v_dual_mov_b32 v0, s8 :: v_dual_mov_b32 v1, s9
-; GCN-NEXT: v_dual_mov_b32 v2, s10 :: v_dual_mov_b32 v3, s11
-; GCN-NEXT: global_store_b128 v4, v[0:3], s[0:1]
+; GCN-NEXT: v_dual_mov_b32 v1, s8 :: v_dual_mov_b32 v2, s9
+; GCN-NEXT: v_dual_mov_b32 v3, s10 :: v_dual_mov_b32 v4, s11
+; GCN-NEXT: global_store_b128 v0, v[1:4], s[0:1]
; GCN-NEXT: s_add_nc_u64 s[0:1], s[0:1], 16
; GCN-NEXT: s_cbranch_scc1 .LBB2_2
; GCN-NEXT: .LBB2_3: ; %for.end
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir
index 3c15201cd698e..557ca065f6129 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir
@@ -1,21 +1,25 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -run-pass=machine-scheduler -amdgpu-disable-unclustered-high-rp-reschedule -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX908 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=machine-scheduler -amdgpu-disable-unclustered-high-rp-reschedule -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX90A %s
--- |
- ; User-requested maximum number of VGPRs need to be taken into account by
- ; the scheduler's rematerialization stage. Register usage above that number
- ; is considered like spill; occupancy is "inadvertently" increased when
- ; eliminating spill.
define void @small_num_vgprs_as_spill() "amdgpu-num-vgpr"="28" {
ret void
}
-
- ; Min/Max occupancy is 8, the scheduler's rematerialization stage should not
- ; try to rematerialize instructions.
+ define void @dont_remat_waves_per_eu() "amdgpu-flat-work-group-size"="1024,1024" "amdgpu-waves-per-eu"="7,7" {
+ ret void
+ }
define void @dont_remat_at_max_occ() "amdgpu-flat-work-group-size"="1024,1024" {
ret void
}
+ define void @reduce_arch_and_acc_vgrp_spill() "amdgpu-flat-work-group-size"="1024,1024" {
+ ret void
+ }
---
+# User-requested maximum number of VGPRs need to be taken into account by
+# the scheduler's rematerialization stage. Register usage above that number
+# is considered like spill; occupancy is "inadvertently" increased when
+# eliminating spill.
name: small_num_vgprs_as_spill
tracksRegLiveness: true
machineFunctionInfo:
@@ -69,6 +73,55 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]], implicit [[V_CVT_I32_F64_e32_27]]
; GFX908-NEXT: S_ENDPGM 0
+ ;
+ ; GFX90A-LABEL: name: small_num_vgprs_as_spill
+ ; GFX90A: bb.0:
+ ; GFX90A-NEXT: successors: %bb.1(0x80000000)
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: bb.1:
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]]
+ ; GFX90A-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1
@@ -119,6 +172,172 @@ body: |
S_ENDPGM 0
...
+# Min/Max occupancy is 8, but user requests 7, the scheduler's rematerialization
+# stage should not try to rematerialize instructions.
+---
+name: dont_remat_waves_per_eu
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; GFX908-LABEL: name: dont_remat_waves_per_eu
+ ; GFX908: bb.0:
+ ; GFX908-NEXT: successors: %bb.1(0x80000000)
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_34:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 34, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_35:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 35, implicit $exec, implicit $mode
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.1:
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]], implicit [[V_CVT_I32_F64_e32_34]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_35]]
+ ; GFX908-NEXT: S_ENDPGM 0
+ ;
+ ; GFX90A-LABEL: name: dont_remat_waves_per_eu
+ ; GFX90A: bb.0:
+ ; GFX90A-NEXT: successors: %bb.1(0x80000000)
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_34:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 34, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_35:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 35, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: bb.1:
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]], implicit [[V_CVT_I32_F64_e32_34]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_35]]
+ ; GFX90A-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1
+
+ %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
+ %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ %3:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ %4:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
+ %5:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
+ %6:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
+ %7:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
+ %8:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
+ %9:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
+ %10:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ %11:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
+ %12:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ %13:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
+ %14:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
+ %15:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
+ %16:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
+ %17:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ %18:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
+ %19:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ %20:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ %21:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ %22:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ %23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode,
+ %25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode,
+ %26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode,
+ %27:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode,
+ %28:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode,
+ %29:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode,
+ %30:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode,
+ %31:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode,
+ %32:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode,
+ %33:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode,
+ %34:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 34, implicit $exec, implicit $mode,
+ %35:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 35, implicit $exec, implicit $mode,
+
+ bb.1:
+
+ S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4
+ S_NOP 0, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9
+ S_NOP 0, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14
+ S_NOP 0, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19
+ S_NOP 0, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24
+ S_NOP 0, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29
+ S_NOP 0, implicit %30, implicit %31, implicit %32, implicit %33, implicit %34
+ S_NOP 0, implicit %35
+
+ S_ENDPGM 0
+...
+# Min/Max occupancy is 8, the scheduler's rematerialization stage should not
+# try to rematerialize instructions.
---
name: dont_remat_at_max_occ
tracksRegLiveness: true
@@ -171,6 +390,53 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]]
; GFX908-NEXT: S_ENDPGM 0
+ ;
+ ; GFX90A-LABEL: name: dont_remat_at_max_occ
+ ; GFX90A: bb.0:
+ ; GFX90A-NEXT: successors: %bb.1(0x80000000)
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: bb.1:
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]]
+ ; GFX90A-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1
@@ -219,3 +485,278 @@ body: |
S_ENDPGM 0
...
+# Min/Max occupancy is 8. For targets with unified RF we are able to eliminate
+# VGPR spilling w.r.t. minimum workgroup size induced occupancy. We can only
+# eliminate ArchVGPR spilling in the non-unified RF case, which requires less
+# remats than in the unified case.
+---
+name: reduce_arch_and_acc_vgrp_spill
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; GFX908-LABEL: name: reduce_arch_and_acc_vgrp_spill
+ ; GFX908: bb.0:
+ ; GFX908-NEXT: successors: %bb.1(0x80000000)
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 64, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF1:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF2:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF3:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF4:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF5:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF6:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF7:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF8:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF9:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF10:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF11:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF12:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF13:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF14:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF15:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF16:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF17:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF18:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF19:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF20:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF21:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF22:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF23:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF24:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF25:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF26:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF27:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF28:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF29:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF30:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF31:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF32:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.1:
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF8]], implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF18]], implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF28]], implicit [[DEF29]], implicit [[DEF30]], implicit [[DEF31]], implicit [[V_CVT_I32_F64_e32_31]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF32]]
+ ; GFX908-NEXT: S_ENDPGM 0
+ ;
+ ; GFX90A-LABEL: name: reduce_arch_and_acc_vgrp_spill
+ ; GFX90A: bb.0:
+ ; GFX90A-NEXT: successors: %bb.1(0x80000000)
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF1:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF2:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF3:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF4:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF5:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF6:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF7:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF8:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF9:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF10:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF11:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF12:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF13:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF14:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF15:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF16:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF17:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF18:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF19:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF20:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF21:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF22:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF23:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF24:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF25:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF26:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF27:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF28:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF29:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF30:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF31:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 64, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[DEF32:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: bb.1:
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]]
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF8]], implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF18]], implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF28]], implicit [[DEF29]], implicit [[DEF30]], implicit [[DEF31]], implicit [[V_CVT_I32_F64_e32_27]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF32]]
+ ; GFX90A-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1
+
+ %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
+ %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ %3:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ %4:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
+ %5:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
+ %6:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
+ %7:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
+ %8:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
+ %9:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
+ %10:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ %11:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
+ %12:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ %13:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
+ %14:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
+ %15:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
+ %16:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
+ %17:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ %18:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
+ %19:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ %20:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ %21:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ %22:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ %23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ %25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ %26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ %27:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
+ %28:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
+ %29:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
+ %30:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
+ %31:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
+ %32:agpr_32 = IMPLICIT_DEF
+ %33:agpr_32 = IMPLICIT_DEF
+ %34:agpr_32 = IMPLICIT_DEF
+ %35:agpr_32 = IMPLICIT_DEF
+ %36:agpr_32 = IMPLICIT_DEF
+ %37:agpr_32 = IMPLICIT_DEF
+ %38:agpr_32 = IMPLICIT_DEF
+ %39:agpr_32 = IMPLICIT_DEF
+ %40:agpr_32 = IMPLICIT_DEF
+ %41:agpr_32 = IMPLICIT_DEF
+ %42:agpr_32 = IMPLICIT_DEF
+ %43:agpr_32 = IMPLICIT_DEF
+ %44:agpr_32 = IMPLICIT_DEF
+ %45:agpr_32 = IMPLICIT_DEF
+ %46:agpr_32 = IMPLICIT_DEF
+ %47:agpr_32 = IMPLICIT_DEF
+ %48:agpr_32 = IMPLICIT_DEF
+ %49:agpr_32 = IMPLICIT_DEF
+ %50:agpr_32 = IMPLICIT_DEF
+ %51:agpr_32 = IMPLICIT_DEF
+ %52:agpr_32 = IMPLICIT_DEF
+ %53:agpr_32 = IMPLICIT_DEF
+ %54:agpr_32 = IMPLICIT_DEF
+ %55:agpr_32 = IMPLICIT_DEF
+ %56:agpr_32 = IMPLICIT_DEF
+ %57:agpr_32 = IMPLICIT_DEF
+ %58:agpr_32 = IMPLICIT_DEF
+ %59:agpr_32 = IMPLICIT_DEF
+ %60:agpr_32 = IMPLICIT_DEF
+ %61:agpr_32 = IMPLICIT_DEF
+ %62:agpr_32 = IMPLICIT_DEF
+ %63:agpr_32 = IMPLICIT_DEF
+
+ %64:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 64, implicit $exec, implicit $mode
+ %65:agpr_32 = IMPLICIT_DEF
+
+ bb.1:
+
+ S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4
+ S_NOP 0, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9
+ S_NOP 0, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14
+ S_NOP 0, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19
+ S_NOP 0, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24
+ S_NOP 0, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29
+ S_NOP 0, implicit %30, implicit %31, implicit %32, implicit %33, implicit %34
+ S_NOP 0, implicit %35, implicit %36, implicit %37, implicit %38, implicit %39
+ S_NOP 0, implicit %40, implicit %41, implicit %42, implicit %43, implicit %44
+ S_NOP 0, implicit %45, implicit %46, implicit %47, implicit %48, implicit %49
+ S_NOP 0, implicit %50, implicit %51, implicit %52, implicit %53, implicit %54
+ S_NOP 0, implicit %55, implicit %56, implicit %57, implicit %58, implicit %59
+ S_NOP 0, implicit %60, implicit %61, implicit %62, implicit %63, implicit %64
+ S_NOP 0, implicit %65
+
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll b/llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll
index bf1ab9a07d5f6..d62f045674ace 100644
--- a/llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll
+++ b/llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll
@@ -54,6 +54,7 @@ define amdgpu_cs void @_amdgpu_cs_main(float %0, i32 %1) {
;
; GFX12-LABEL: _amdgpu_cs_main:
; GFX12: ; %bb.0: ; %branch1_true
+; GFX12-NEXT: v_mov_b32_e32 v2, 0
; GFX12-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
; GFX12-NEXT: v_mov_b32_e32 v1, 0
; GFX12-NEXT: s_mov_b32 s4, 0
@@ -76,7 +77,6 @@ define amdgpu_cs void @_amdgpu_cs_main(float %0, i32 %1) {
; GFX12-NEXT: s_cbranch_execz .LBB0_1
; GFX12-NEXT: ; %bb.3: ; %branch2_merge
; GFX12-NEXT: ; in Loop: Header=BB0_2 Depth=1
-; GFX12-NEXT: v_mov_b32_e32 v2, 0
; GFX12-NEXT: s_mov_b32 s5, s4
; GFX12-NEXT: s_mov_b32 s6, s4
; GFX12-NEXT: s_mov_b32 s7, s4
diff --git a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll
index ffa0cf5b9b3e0..538ce15979de8 100644
--- a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll
+++ b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll
@@ -76,7 +76,7 @@
; CHECK-NEXT: .lds_size: 0
; CHECK-NEXT: .sgpr_count: 0x22
; CHECK-NEXT: .stack_frame_size_in_bytes: 0x10
-; CHECK-NEXT: .vgpr_count: 0x2
+; CHECK-NEXT: .vgpr_count: 0x3
; CHECK-NEXT: multiple_stack:
; CHECK-NEXT: .backend_stack_size: 0x24
; CHECK-NEXT: .lds_size: 0
>From cfc61472ddc6c365a1d2ad40c6c8cac6732e9025 Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Mon, 17 Feb 2025 12:45:11 +0000
Subject: [PATCH 06/10] == -> >= for achieved occupancy check at the end of the
stage
This prevents rollbacking rematerializations in case rescheduling
improved occupancy even further than the target.
Also fix format.
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 2 +-
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 5 +++--
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 8f5b96fb788d8..6ae7840f11595 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -2132,7 +2132,7 @@ void PreRARematStage::finalizeGCNSchedStage() {
// which case we do not want to rollback either (the rescheduling was already
// reverted in PreRARematStage::shouldRevertScheduling in such cases).
unsigned MaxOcc = std::max(AchievedOcc, DAG.MinOccupancy);
- if (!IncreaseOccupancy || MaxOcc == TargetOcc)
+ if (!IncreaseOccupancy || MaxOcc >= TargetOcc)
return;
REMAT_DEBUG(dbgs() << "Rollbacking all rematerializations\n");
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 92c6de8f3d7ad..f87a9038b95ca 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1558,8 +1558,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// \returns max num VGPRs. This is the common utility function
/// called by MachineFunction and Function variants of getMaxNumVGPRs.
- unsigned getBaseMaxNumVGPRs(const Function &F,
- std::pair<unsigned, unsigned> NumVGPRBounds) const;
+ unsigned
+ getBaseMaxNumVGPRs(const Function &F,
+ std::pair<unsigned, unsigned> NumVGPRBounds) const;
/// \returns Maximum number of VGPRs that meets number of waves per execution
/// unit requirement for function \p F, or number of VGPRs explicitly
>From 1216b5d8328d08366d34cf6c457691421cc68812 Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Thu, 20 Feb 2025 00:20:22 +0000
Subject: [PATCH 07/10] Address feedback
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 15 +++++--
...ine-scheduler-sink-trivial-remats-attr.mir | 24 +++++------
.../machine-scheduler-sink-trivial-remats.mir | 42 +++++++++++++------
3 files changed, 53 insertions(+), 28 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 6ae7840f11595..0fb4c77fa8ad4 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1690,6 +1690,7 @@ bool PreRARematStage::hasExcessVGPRs(const GCNRegPressure &RP,
HasSpill = true;
}
if (NumAGPRs > MaxVGPRs) {
+ ExcessArchVGPRs = NumArchVGPRs;
AGPRLimited = true;
HasSpill = true;
}
@@ -1747,7 +1748,8 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
const unsigned MaxVGPRsNoSpill =
ST.getBaseMaxNumVGPRs(F, {ST.getMinNumVGPRs(OccBounds.second),
ST.getMaxNumVGPRs(OccBounds.first)});
- const unsigned MaxSGPRsMinOcc = ST.getMaxNumSGPRs(DAG.MinOccupancy, false);
+ const unsigned MaxSGPRsIncOcc =
+ ST.getMaxNumSGPRs(DAG.MinOccupancy + 1, false);
const unsigned MaxVGPRsIncOcc = ST.getMaxNumVGPRs(DAG.MinOccupancy + 1);
IncreaseOccupancy = OccBounds.second > DAG.MinOccupancy;
@@ -1784,13 +1786,20 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
});
} else if (IncreaseOccupancy) {
// Check whether SGPR pressure prevents us from increasing occupancy.
- if (ClearOptRegionsIf(NumSGPRs > MaxSGPRsMinOcc))
+ if (ClearOptRegionsIf(NumSGPRs > MaxSGPRsIncOcc)) {
+ if (DAG.MinOccupancy >= OccBounds.first)
+ return false;
continue;
+ }
if (hasExcessVGPRs(RP, MaxVGPRsIncOcc, ExcessRP, OccAGPRLimited)) {
// Check whether AGPR pressure prevents us from increasing occupancy.
- if (ClearOptRegionsIf(OccAGPRLimited))
+ if (ClearOptRegionsIf(OccAGPRLimited)) {
+ if (DAG.MinOccupancy >= OccBounds.first)
+ return false;
continue;
+ }
+
// Occupancy could be increased by rematerializing ArchVGPRs.
REMAT_DEBUG({
if (ExcessRP) {
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir
index 557ca065f6129..dd190b70c25e5 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir
@@ -486,9 +486,9 @@ body: |
S_ENDPGM 0
...
# Min/Max occupancy is 8. For targets with unified RF we are able to eliminate
-# VGPR spilling w.r.t. minimum workgroup size induced occupancy. We can only
-# eliminate ArchVGPR spilling in the non-unified RF case, which requires less
-# remats than in the unified case.
+# VGPR spilling w.r.t. minimum workgroup size induced occupancy using just
+# enough remats. In the non-unified RF case we remat all eligible ArchVGPRs to
+# try to eliminate both ArchVGPR and AGPR spilling.
---
name: reduce_arch_and_acc_vgrp_spill
tracksRegLiveness: true
@@ -526,11 +526,6 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 64, implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF1:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF2:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -568,18 +563,23 @@ body: |
; GFX908-NEXT: bb.1:
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]]
; GFX908-NEXT: S_NOP 0, implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]]
; GFX908-NEXT: S_NOP 0, implicit [[DEF8]], implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 64, implicit $exec, implicit $mode
; GFX908-NEXT: S_NOP 0, implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]]
; GFX908-NEXT: S_NOP 0, implicit [[DEF18]], implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]]
; GFX908-NEXT: S_NOP 0, implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF28]], implicit [[DEF29]], implicit [[DEF30]], implicit [[DEF31]], implicit [[V_CVT_I32_F64_e32_31]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF28]], implicit [[DEF29]], implicit [[DEF30]], implicit [[DEF31]], implicit [[V_CVT_I32_F64_e32_32]]
; GFX908-NEXT: S_NOP 0, implicit [[DEF32]]
; GFX908-NEXT: S_ENDPGM 0
;
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
index ab7f8d544ddd2..f9413eed7521c 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
@@ -1168,21 +1168,26 @@ body: |
; GFX908-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
; GFX908-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[S_MOV_B32_80:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
; GFX908-NEXT: [[S_MOV_B32_81:%[0-9]+]]:sgpr_32 = S_MOV_B32 80
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
@@ -1196,14 +1201,13 @@ body: |
; GFX908-NEXT: bb.2:
; GFX908-NEXT: successors: %bb.3(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_27]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_28]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.3:
; GFX908-NEXT: successors: %bb.5(0x04000000), %bb.4(0x7c000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_28]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_27]]
; GFX908-NEXT: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc
; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = S_ADD_I32 [[S_MOV_B32_]].sub0, -1, implicit-def dead $scc
; GFX908-NEXT: S_CMP_LG_U32 [[S_MOV_B32_]].sub0, 0, implicit-def $scc
@@ -1228,7 +1232,11 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_24]], implicit [[V_CVT_I32_F64_e32_25]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_26]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_32]]
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]]
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_5]], implicit [[S_MOV_B32_6]]
@@ -1307,7 +1315,12 @@ body: |
%34:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
%35:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
%36:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
- %37:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
+ %37:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
+ %38:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
+ %39:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
+ %40:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
+ %41:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
+ %42:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
%100:sgpr_32 = S_MOV_B32 0
%101:sgpr_32 = S_MOV_B32 1
@@ -1430,7 +1443,10 @@ body: |
S_NOP 0, implicit %30, implicit %31
S_NOP 0, implicit %32, implicit %33
S_NOP 0, implicit %34, implicit %35
- S_NOP 0, implicit %36
+ S_NOP 0, implicit %36, implicit %37
+ S_NOP 0, implicit %38, implicit %39
+ S_NOP 0, implicit %40, implicit %41
+ S_NOP 0, implicit %42
S_NOP 0, implicit %100, implicit %101
S_NOP 0, implicit %102, implicit %103
>From a1f1e4de7be2a0a5a6b985be8c1f3ff18007113a Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Thu, 27 Feb 2025 12:09:43 +0000
Subject: [PATCH 08/10] Incorporate machine scheduler change from main
Now that single-MI regions are no longer filtered out by the machine
scheduler, we can greatly simplify the logic to update region boundaries
during rematerialization/rollbacking, at the cost of a single walk
through the MF's instructions at the beginning of the stage.
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 96 ++++++++++-----------
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 17 ++--
2 files changed, 54 insertions(+), 59 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 0fb4c77fa8ad4..7a4f08aef0453 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1099,6 +1099,13 @@ bool PreRARematStage::initGCNSchedStage() {
DAG.Regions.size() == 1 || !canIncreaseOccupancyOrReduceSpill())
return false;
+ // Map all MIs to their parent region.
+ for (unsigned I = 0, E = DAG.Regions.size(); I < E; ++I) {
+ RegionBoundaries Region = DAG.Regions[I];
+ for (auto MI = Region.first; MI != Region.second; ++MI)
+ MIRegion.insert({&*MI, I});
+ }
+
// Rematerialize identified instructions and update scheduler's state.
rematerialize();
if (GCNTrackers)
@@ -1841,7 +1848,7 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
// Cache set of registers that are going to be rematerialized.
DenseSet<unsigned> RematRegs;
- // identify rematerializable instructions in the function.
+ // Identify rematerializable instructions in the function.
for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
auto Region = DAG.Regions[I];
for (auto MI = Region.first; MI != Region.second; ++MI) {
@@ -1885,7 +1892,7 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
REMAT_DEBUG(dbgs() << "Region " << I << ": remat instruction " << DefMI);
RematInstruction &Remat =
- Rematerializations.try_emplace(&DefMI, I, UseMI).first->second;
+ Rematerializations.try_emplace(&DefMI, UseMI).first->second;
bool RematUseful = false;
if (auto It = OptRegions.find(I); It != OptRegions.end()) {
@@ -1957,6 +1964,7 @@ void PreRARematStage::rematerialize() {
MachineBasicBlock::iterator InsertPos(Remat.UseMI);
Register Reg = DefMI->getOperand(0).getReg();
unsigned SubReg = DefMI->getOperand(0).getSubReg();
+ unsigned DefRegion = MIRegion.at(DefMI);
// Rematerialize DefMI to its use block.
TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg, SubReg, *DefMI,
@@ -1968,8 +1976,12 @@ void PreRARematStage::rematerialize() {
// Update region boundaries in regions we sinked from (remove defining MI)
// and to (insert MI rematerialized in use block). Only then we can erase
// the original MI.
- DAG.updateRegionBoundaries(DAG.Regions, DefMI, nullptr);
- DAG.updateRegionBoundaries(DAG.Regions, InsertPos, Remat.RematMI);
+ DAG.updateRegionBoundaries(DAG.Regions[DefRegion], DefMI, nullptr);
+ auto UseRegion = MIRegion.find(Remat.UseMI);
+ if (UseRegion != MIRegion.end()) {
+ DAG.updateRegionBoundaries(DAG.Regions[UseRegion->second], InsertPos,
+ Remat.RematMI);
+ }
DefMI->eraseFromParent();
DAG.LIS->RemoveMachineInstrFromMaps(*DefMI);
@@ -2024,8 +2036,8 @@ void PreRARematStage::rematerialize() {
}
// RP in the region from which the instruction was rematerialized may or may
// not decrease.
- ImpactedRegions.insert({Remat.DefRegion, DAG.Pressure[Remat.DefRegion]});
- RecomputeRP.insert(Remat.DefRegion);
+ ImpactedRegions.insert({DefRegion, DAG.Pressure[DefRegion]});
+ RecomputeRP.insert(DefRegion);
// Update the register's live interval manually. This is aligned with the
// instruction collection phase in that it assumes a single def and use
@@ -2149,10 +2161,11 @@ void PreRARematStage::finalizeGCNSchedStage() {
static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
// Rollback the rematerializations.
- for (const auto &[_, Remat] : Rematerializations) {
+ for (const auto &[DefMI, Remat] : Rematerializations) {
MachineInstr &RematMI = *Remat.RematMI;
- MachineBasicBlock::iterator InsertPos(DAG.Regions[Remat.DefRegion].second);
- MachineBasicBlock *MBB = getRegionMBB(MF, DAG.Regions[Remat.DefRegion]);
+ unsigned DefRegion = MIRegion.at(DefMI);
+ MachineBasicBlock::iterator InsertPos(DAG.Regions[DefRegion].second);
+ MachineBasicBlock *MBB = getRegionMBB(MF, DAG.Regions[DefRegion]);
Register Reg = RematMI.getOperand(0).getReg();
unsigned SubReg = RematMI.getOperand(0).getSubReg();
@@ -2164,6 +2177,13 @@ void PreRARematStage::finalizeGCNSchedStage() {
NewMI->getOperand(0).setSubReg(SubReg);
DAG.LIS->InsertMachineInstrInMaps(*NewMI);
+ auto UseRegion = MIRegion.find(Remat.UseMI);
+ if (UseRegion != MIRegion.end()) {
+ DAG.updateRegionBoundaries(DAG.Regions[UseRegion->second], RematMI,
+ nullptr);
+ }
+ DAG.updateRegionBoundaries(DAG.Regions[DefRegion], InsertPos, NewMI);
+
// Erase rematerialized MI.
RematMI.eraseFromParent();
DAG.LIS->RemoveMachineInstrFromMaps(RematMI);
@@ -2185,49 +2205,25 @@ void PreRARematStage::finalizeGCNSchedStage() {
}
void GCNScheduleDAGMILive::updateRegionBoundaries(
- SmallVectorImpl<RegionBoundaries> &RegionBoundaries,
- MachineBasicBlock::iterator MI, MachineInstr *NewMI) {
- unsigned I = 0, E = RegionBoundaries.size();
- // Search for first region of the block where MI is located. We may encounter
- // an empty region if all instructions from an initially non-empty region were
- // removed.
- while (I != E && RegionBoundaries[I].first != RegionBoundaries[I].second &&
- MI->getParent() != RegionBoundaries[I].first->getParent())
- ++I;
-
- for (; I != E; ++I) {
- auto &Bounds = RegionBoundaries[I];
- assert(MI != Bounds.second && "cannot insert at region end");
- assert(!NewMI || NewMI != Bounds.second && "cannot remove at region end");
-
- // We may encounter an empty region if all of the region' instructions were
- // previously removed.
- if (Bounds.first == Bounds.second) {
- if (MI->getParent()->end() != Bounds.second)
- return;
- continue;
- }
- if (MI->getParent() != Bounds.first->getParent())
- return;
-
- // We only care for modifications at the beginning of the region since the
- // upper region boundary is exclusive.
- if (MI != Bounds.first)
- continue;
- if (!NewMI) {
- // This is an MI removal, which may leave the region empty; in such cases
- // set both boundaries to the removed instruction's MBB's end.
- MachineBasicBlock::iterator NextMI = std::next(MI);
- if (NextMI != Bounds.second)
- Bounds.first = NextMI;
- else
- Bounds.first = Bounds.second;
- } else {
- // This is an MI insertion at the beggining of the region.
- Bounds.first = NewMI;
- }
+ RegionBoundaries &RegionBounds, MachineBasicBlock::iterator MI,
+ MachineInstr *NewMI) {
+ assert(!NewMI ||
+ NewMI != RegionBounds.second && "cannot remove at region end");
+
+ if (RegionBounds.first == RegionBounds.second) {
+ assert(NewMI && "cannot remove from an empty region");
+ RegionBounds.first = NewMI;
return;
}
+
+ // We only care for modifications at the beginning of a non-empty region since
+ // the upper region boundary is exclusive.
+ if (MI != RegionBounds.first)
+ return;
+ if (!NewMI)
+ RegionBounds.first = std::next(MI); // Removal
+ else
+ RegionBounds.first = NewMI; // Insertion
}
static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG) {
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 9fd40d8e5e64f..e8a169072dee4 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -295,11 +295,10 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
/// If necessary, updates a region's boundaries following insertion ( \p NewMI
/// != nullptr) or removal ( \p NewMI == nullptr) of a \p MI in the region.
/// For an MI removal, this must be called before the MI is actually erased
- /// from its parent MBB. If a region is left empty by a removal, both
- /// boundaries are set to the last removed MI's MBB's end.
- void
- updateRegionBoundaries(SmallVectorImpl<RegionBoundaries> &RegionBoundaries,
- MachineBasicBlock::iterator MI, MachineInstr *NewMI);
+ /// from its parent MBB.
+ void updateRegionBoundaries(RegionBoundaries &RegionBounds,
+ MachineBasicBlock::iterator MI,
+ MachineInstr *NewMI);
void runSchedStages();
@@ -460,13 +459,13 @@ class PreRARematStage : public GCNSchedStage {
/// Set of regions in which the rematerializable instruction's defined
/// register is a live-in.
SmallDenseSet<unsigned, 4> LiveInRegions;
- /// Region containing the rematerializable instruction.
- unsigned DefRegion;
- RematInstruction(unsigned DefRegion, MachineInstr *UseMI)
- : UseMI(UseMI), DefRegion(DefRegion) {}
+ RematInstruction(MachineInstr *UseMI) : UseMI(UseMI) {}
};
+ /// Maps all MIs to their parent region. MI terminators are considered to be
+ /// outside the region they delimitate, and as such are not stored in the map.
+ DenseMap<MachineInstr *, unsigned> MIRegion;
/// Collects instructions to rematerialize.
MapVector<MachineInstr *, RematInstruction> Rematerializations;
/// Collect regions whose live-ins or register pressure will change due to
>From c6027f6497eae33eec72cd22e788997efdf17818 Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Tue, 18 Mar 2025 14:28:30 +0000
Subject: [PATCH 09/10] Address feedback
- Allow remat in same block if using MI is in different region. Add unit
test.
- Take into account excess ArchVGPR/AGPR pressure above addressable
limits. Add unit tests to make sure this works.
- Fix unused register in existing unit test.
---
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 10 +-
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 242 ++-
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 7 -
...ine-scheduler-sink-trivial-remats-attr.mir | 1862 ++++++++++++++++-
.../machine-scheduler-sink-trivial-remats.mir | 180 +-
5 files changed, 2138 insertions(+), 163 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 7554b9f578fcb..c23f3c00f89e9 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -53,11 +53,19 @@ struct GCNRegPressure {
/// UnifiedVGPRFile
unsigned getVGPRNum(bool UnifiedVGPRFile) const {
if (UnifiedVGPRFile) {
- return Value[AGPR32] ? alignTo(Value[VGPR32], 4) + Value[AGPR32]
+ return Value[AGPR32] ? getUnifiedVGPRNum(Value[VGPR32], Value[AGPR32])
: Value[VGPR32] + Value[AGPR32];
}
return std::max(Value[VGPR32], Value[AGPR32]);
}
+
+ /// Returns the aggregated VGPR pressure, assuming \p NumArchVGPRs ArchVGPRs
+ /// and \p NumAGPRs AGPRS, for a target with a unified VGPR file.
+ inline static unsigned getUnifiedVGPRNum(unsigned NumArchVGPRs,
+ unsigned NumAGPRs) {
+ return alignTo(NumArchVGPRs, 4) + NumAGPRs;
+ }
+
/// \returns the ArchVGPR32 pressure
unsigned getArchVGPRNum() const { return Value[VGPR32]; }
/// \returns the AccVGPR32 pressure
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 7a4f08aef0453..5204bc9bfcf6d 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1682,40 +1682,135 @@ bool PreRARematStage::allUsesAvailableAt(const MachineInstr *InstToRemat,
return true;
}
-bool PreRARematStage::hasExcessVGPRs(const GCNRegPressure &RP,
- unsigned MaxVGPRs,
- unsigned &ExcessArchVGPRs,
- bool &AGPRLimited) {
+namespace {
+/// Models excess register pressure in a region and tracks our progress as we
+/// identify rematerialization opportunities.
+struct ExcessRP {
+ /// Number of excess ArchVGPRs.
+ unsigned ArchVGPRs = 0;
+ /// Number of excess AGPRs.
+ unsigned AGPRs = 0;
+ /// For unified register files, number of excess VGPRs.
+ unsigned VGPRs = 0;
+ /// For unified register files with AGPR usage, number of excess ArchVGPRs to
+ /// save before we are able to save a whole allocation granule.
+ unsigned ArchVGPRsToAlignment = 0;
+ /// Whether the region uses AGPRs.
+ bool HasAGPRs = false;
+
+ /// Constructs the excess RP model; determines the excess pressure w.r.t. a
+ /// maximum number of allowed VGPRs.
+ ExcessRP(const GCNSubtarget &ST, const GCNRegPressure &RP, unsigned MaxVGPRs);
+
+ /// Accounts for \p NumRegs saved ArchVGPRs in the model. If \p
+ /// UseArchVGPRForAGPRSpill is true, saved ArchVGPRs are used to save excess
+ /// AGPRs once excess ArchVGPR pressure has been eliminated. Returns whether
+ /// saving these ArchVGPRs helped reduce excess pressure.
+ bool saveArchVGPRs(unsigned NumRegs, bool UseArchVGPRForAGPRSpill);
+
+ /// Accounts for \p NumRegs saved AGPRS in the model. Returns whether saving
+ /// these ArchVGPRs helped reduce excess pressure.
+ bool saveAGPRs(unsigned NumRegs);
+
+ /// Returns whether there is any excess register pressure.
+ operator bool() const { return ArchVGPRs != 0 || AGPRs != 0 || VGPRs != 0; }
+
+#ifndef NDEBUG
+ friend raw_ostream &operator<<(raw_ostream &OS, const ExcessRP &Excess) {
+ OS << Excess.ArchVGPRs << " ArchVGPRs, " << Excess.AGPRs << " AGPRs, and "
+ << Excess.VGPRs << " VGPRs (next ArchVGPR aligment in "
+ << Excess.ArchVGPRsToAlignment << " registers)\n";
+ return OS;
+ }
+#endif
+
+private:
+ static inline bool saveRegs(unsigned &LeftToSave, unsigned &NumRegs) {
+ unsigned NumSaved = std::min(LeftToSave, NumRegs);
+ NumRegs -= NumSaved;
+ LeftToSave -= NumSaved;
+ return NumSaved;
+ }
+};
+} // namespace
+
+ExcessRP::ExcessRP(const GCNSubtarget &ST, const GCNRegPressure &RP,
+ unsigned MaxVGPRs) {
+ unsigned NumArchVGPRs = RP.getArchVGPRNum();
unsigned NumAGPRs = RP.getAGPRNum();
- if (!ST.hasGFX90AInsts() || !NumAGPRs) {
- // Non-unified RF. We can only reduce ArchVGPR excess pressure at this
- // point, but still want to identify when there is AGPR excess pressure.
- bool HasSpill = false;
- unsigned NumArchVGPRs = RP.getArchVGPRNum();
- if (NumArchVGPRs > MaxVGPRs) {
- ExcessArchVGPRs = NumArchVGPRs - MaxVGPRs;
- HasSpill = true;
- }
- if (NumAGPRs > MaxVGPRs) {
- ExcessArchVGPRs = NumArchVGPRs;
- AGPRLimited = true;
- HasSpill = true;
+ HasAGPRs = NumAGPRs;
+
+ if (!ST.hasGFX90AInsts()) {
+ // Non-unified RF. Account for excess pressure for ArchVGPRs and AGPRs
+ // independently.
+ if (NumArchVGPRs > MaxVGPRs)
+ ArchVGPRs = NumArchVGPRs - MaxVGPRs;
+ if (NumAGPRs > MaxVGPRs)
+ AGPRs = NumAGPRs - MaxVGPRs;
+ return;
+ }
+
+ // Independently of whether overall VGPR pressure is under the limit, we still
+ // have to check whether ArchVGPR pressure or AGPR pressure alone exceeds the
+ // number of addressable registers in each category.
+ const unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs();
+ if (NumArchVGPRs > MaxArchVGPRs) {
+ ArchVGPRs = NumArchVGPRs - MaxArchVGPRs;
+ NumArchVGPRs = MaxArchVGPRs;
+ }
+ if (NumAGPRs > MaxArchVGPRs) {
+ AGPRs = NumAGPRs - MaxArchVGPRs;
+ NumAGPRs = MaxArchVGPRs;
+ }
+
+ // Check overall VGPR usage against the limit; any excess above addressable
+ // register limits has already been accounted for.
+ unsigned NumVGPRs = GCNRegPressure::getUnifiedVGPRNum(NumArchVGPRs, NumAGPRs);
+ if (NumVGPRs > MaxVGPRs) {
+ VGPRs = NumVGPRs - MaxVGPRs;
+ ArchVGPRsToAlignment = NumArchVGPRs - alignDown(NumArchVGPRs, 4);
+ if (!ArchVGPRsToAlignment)
+ ArchVGPRsToAlignment = 4;
+ }
+}
+
+bool ExcessRP::saveArchVGPRs(unsigned NumRegs, bool UseArchVGPRForAGPRSpill) {
+ bool Progress = saveRegs(ArchVGPRs, NumRegs);
+
+ if (HasAGPRs) {
+ // ArchVGPRs can only be allocated as a multiple of a granule.
+ const unsigned Granule = 4;
+ unsigned NumSavedRegs = 0;
+
+ // Count the number of whole ArchVGPR allocation granules we can save.
+ if (unsigned NumGranules = NumRegs / Granule; NumGranules) {
+ NumSavedRegs = NumGranules * Granule;
+ NumRegs -= NumSavedRegs;
}
- return HasSpill;
- }
- if (RP.getVGPRNum(true) > MaxVGPRs) {
- // Unified RF. We can only remat ArchVGPRs; AGPR pressure alone may prevent
- // us from eliminating spilling.
- unsigned NumArchVGPRs = RP.getArchVGPRNum();
- if (NumAGPRs >= MaxVGPRs) {
- AGPRLimited = true;
- ExcessArchVGPRs = NumArchVGPRs;
+
+ // We may be able to save one more whole ArchVGPR allocation granule.
+ if (NumRegs >= ArchVGPRsToAlignment) {
+ NumSavedRegs += Granule;
+ ArchVGPRsToAlignment = Granule - (NumRegs - ArchVGPRsToAlignment);
} else {
- ExcessArchVGPRs = NumArchVGPRs - alignDown(MaxVGPRs - NumAGPRs, 4);
+ ArchVGPRsToAlignment -= NumRegs;
}
- return true;
+
+ // Prioritize saving generic VGPRs, then AGPRs if we allow AGPR-to-ArchVGPR
+ // spilling and have some free ArchVGPR slots.
+ Progress |= saveRegs(VGPRs, NumSavedRegs);
+ if (UseArchVGPRForAGPRSpill)
+ Progress |= saveRegs(AGPRs, NumSavedRegs);
+ } else {
+ // No AGPR usage in the region i.e., no allocation granule to worry about.
+ Progress |= saveRegs(VGPRs, NumRegs);
}
- return false;
+
+ return Progress;
+}
+
+bool ExcessRP::saveAGPRs(unsigned NumRegs) {
+ return saveRegs(AGPRs, NumRegs) || saveRegs(VGPRs, NumRegs);
}
bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
@@ -1725,10 +1820,9 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
<< MF.getFunction().getName() << '\n');
// Maps optimizable regions (i.e., regions at minimum and VGPR-limited
- // occupancy, or regions with VGPR spilling) to their excess RP.
- DenseMap<unsigned, unsigned> OptRegions;
+ // occupancy, or regions with VGPR spilling) to a model of their excess RP.
+ DenseMap<unsigned, ExcessRP> OptRegions;
const Function &F = MF.getFunction();
- const bool UnifiedRF = ST.hasGFX90AInsts();
// Adjust workgroup size induced occupancy bounds with the
// "amdgpu-waves-per-eu" attribute. This should be offloaded to a subtarget
@@ -1774,23 +1868,15 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
// occupancy by one in the whole function.
for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
GCNRegPressure &RP = DAG.Pressure[I];
- unsigned ExcessRP = 0;
- unsigned NumSGPRs = RP.getSGPRNum();
// Check whether SGPR pressures prevents us from eliminating spilling.
+ unsigned NumSGPRs = RP.getSGPRNum();
if (NumSGPRs > MaxSGPRsNoSpill)
ClearOptRegionsIf(IncreaseOccupancy);
- bool OccAGPRLimited = false;
- if (hasExcessVGPRs(RP, MaxVGPRsNoSpill, ExcessRP, OccAGPRLimited)) {
+ ExcessRP Excess(ST, RP, MaxVGPRsNoSpill);
+ if (Excess) {
ClearOptRegionsIf(IncreaseOccupancy);
- REMAT_DEBUG({
- if (ExcessRP) {
- StringRef RegClass = UnifiedRF ? "VGPRs" : "ArchVGPRs";
- dbgs() << "Region " << I << " is spilling " << RegClass << ", save "
- << ExcessRP << " to eliminate " << RegClass << "-spilling\n";
- }
- });
} else if (IncreaseOccupancy) {
// Check whether SGPR pressure prevents us from increasing occupancy.
if (ClearOptRegionsIf(NumSGPRs > MaxSGPRsIncOcc)) {
@@ -1798,46 +1884,54 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
return false;
continue;
}
-
- if (hasExcessVGPRs(RP, MaxVGPRsIncOcc, ExcessRP, OccAGPRLimited)) {
- // Check whether AGPR pressure prevents us from increasing occupancy.
- if (ClearOptRegionsIf(OccAGPRLimited)) {
+ if ((Excess = ExcessRP(ST, RP, MaxVGPRsIncOcc))) {
+ // We can only rematerialize ArchVGPRs at this point.
+ unsigned NumArchVGPRsToRemat = Excess.ArchVGPRs + Excess.VGPRs;
+ bool NotEnoughArchVGPRs = NumArchVGPRsToRemat > RP.getArchVGPRNum();
+ if (ClearOptRegionsIf(Excess.AGPRs || NotEnoughArchVGPRs)) {
if (DAG.MinOccupancy >= OccBounds.first)
return false;
continue;
}
-
- // Occupancy could be increased by rematerializing ArchVGPRs.
- REMAT_DEBUG({
- if (ExcessRP) {
- StringRef RegClass = UnifiedRF ? "VGPRs" : "ArchVGPRs";
- dbgs() << "Region " << I << " has min. occupancy: save " << ExcessRP
- << " " << RegClass << " to improve occupancy\n";
- }
- });
}
}
- if (ExcessRP)
- OptRegions.insert({I, ExcessRP});
+ if (Excess)
+ OptRegions.insert({I, Excess});
}
if (OptRegions.empty())
return false;
+#ifndef DEBUG
+ if (IncreaseOccupancy)
+ REMAT_DEBUG(dbgs() << "Occupancy minimal in regions:\n");
+ else
+ REMAT_DEBUG(dbgs() << "Spilling in regions:\n");
+ for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I)
+ if (auto OptIt = OptRegions.find(I); OptIt != OptRegions.end())
+ REMAT_DEBUG(dbgs() << " " << I << ": " << OptIt->getSecond() << '\n');
+#endif
+
// When we are reducing spilling, the target is the minimum achievable
- // occupancy implied by workgroup sizes / the "amdgpu-waves-per-eu" attribute.
+ // occupancy implied by workgroup sizes / the "amdgpu-waves-per-eu"
+ // attribute.
TargetOcc = IncreaseOccupancy ? DAG.MinOccupancy + 1 : OccBounds.first;
// Accounts for a reduction in RP in an optimizable region. Returns whether we
// estimate that we have identified enough rematerialization opportunities to
- // achieve our goal.
- auto ReduceRPInRegion = [&](auto OptIt, LaneBitmask Mask) -> bool {
- auto NumRegs = SIRegisterInfo::getNumCoveredRegs(Mask);
- unsigned I = OptIt->getFirst();
- unsigned &Excess = OptIt->getSecond();
- if (NumRegs >= Excess)
- OptRegions.erase(I);
- else
- Excess -= NumRegs;
+ // achieve our goal, and sets Progress to true when this particular reduction
+ // in pressure was helpful toward that goal.
+ auto ReduceRPInRegion = [&](auto OptIt, LaneBitmask Mask,
+ bool &Progress) -> bool {
+ ExcessRP &Excess = OptIt->getSecond();
+ // We allow saved ArchVGPRs to be considered as free spill slots for AGPRs
+ // only when we are just trying to eliminate spilling to memory. At this
+ // point we err on the conservative side and do not increase
+ // register-to-register spilling for the sake of increasing occupancy.
+ Progress |=
+ Excess.saveArchVGPRs(SIRegisterInfo::getNumCoveredRegs(Mask),
+ /*UseArchVGPRForAGPRSpill=*/!IncreaseOccupancy);
+ if (!Excess)
+ OptRegions.erase(OptIt->getFirst());
return OptRegions.empty();
};
@@ -1865,9 +1959,11 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
continue;
// We only care to rematerialize the instruction if it has a single
- // non-debug user in a different block.
+ // non-debug user in a different block. The using MI may not belong to a
+ // region if it is a lone region terminator.
MachineInstr *UseMI = DAG.MRI.getOneNonDBGUser(Reg);
- if (!UseMI || DefMI.getParent() == UseMI->getParent())
+ auto UseRegion = MIRegion.find(UseMI);
+ if (!UseMI || (UseRegion != MIRegion.end() && UseRegion->second == I))
continue;
// Do not rematerialize an instruction if it uses or is used by an
@@ -1901,9 +1997,8 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
// maximum RP in the region is reached somewhere between the defining
// instruction and the end of the region.
REMAT_DEBUG(dbgs() << " Defining region is optimizable\n");
- RematUseful = true;
LaneBitmask Mask = DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I)[Reg];
- if (ReduceRPInRegion(It, Mask))
+ if (ReduceRPInRegion(It, Mask, RematUseful))
return true;
}
@@ -1923,8 +2018,7 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
// instruction's use.
if (auto It = OptRegions.find(LIRegion); It != OptRegions.end()) {
REMAT_DEBUG(dbgs() << " Live-in in region " << LIRegion << '\n');
- RematUseful = true;
- if (ReduceRPInRegion(It, DAG.LiveIns[LIRegion][Reg]))
+ if (ReduceRPInRegion(It, DAG.LiveIns[LIRegion][Reg], RematUseful))
return true;
}
}
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index e8a169072dee4..7eca36f070c78 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -484,13 +484,6 @@ class PreRARematStage : public GCNSchedStage {
/// spilling.
bool IncreaseOccupancy;
- /// Determines whether there is excess VGPR pressure in \p RP w.r.t. \p
- /// MaxVGPRs. If there is, \p ExcessArchVGPRs is set to the number of
- /// ArchVGPRs one must save to eliminate the excess and \p AGPRLimited is set
- /// to true if AGPR pressure alone causes an excess.
- bool hasExcessVGPRs(const GCNRegPressure &RP, unsigned MaxVGPRs,
- unsigned &ExcessArchVGPRs, bool &AGPRLimited);
-
/// Returns whether remat can reduce spilling or increase function occupancy
/// by 1 through rematerialization. If it can do one, collects instructions in
/// PreRARematStage::Rematerializations and sets the target occupancy in
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir
index dd190b70c25e5..b72718239de0d 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir
@@ -15,6 +15,12 @@
define void @reduce_arch_and_acc_vgrp_spill() "amdgpu-flat-work-group-size"="1024,1024" {
ret void
}
+ define void @reduce_spill_archvgpr_above_addressable_limit() "amdgpu-waves-per-eu"="1,10" {
+ ret void
+ }
+ define void @reduce_spill_agpr_above_addressable_limit() "amdgpu-waves-per-eu"="1,10" {
+ ret void
+ }
---
# User-requested maximum number of VGPRs need to be taken into account by
# the scheduler's rematerialization stage. Register usage above that number
@@ -526,6 +532,11 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 64, implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF1:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF2:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -563,23 +574,18 @@ body: |
; GFX908-NEXT: bb.1:
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]]
; GFX908-NEXT: S_NOP 0, implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]]
; GFX908-NEXT: S_NOP 0, implicit [[DEF8]], implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]]
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 64, implicit $exec, implicit $mode
; GFX908-NEXT: S_NOP 0, implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]]
; GFX908-NEXT: S_NOP 0, implicit [[DEF18]], implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]]
; GFX908-NEXT: S_NOP 0, implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF28]], implicit [[DEF29]], implicit [[DEF30]], implicit [[DEF31]], implicit [[V_CVT_I32_F64_e32_32]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF28]], implicit [[DEF29]], implicit [[DEF30]], implicit [[DEF31]], implicit [[V_CVT_I32_F64_e32_31]]
; GFX908-NEXT: S_NOP 0, implicit [[DEF32]]
; GFX908-NEXT: S_ENDPGM 0
;
@@ -588,6 +594,35 @@ body: |
; GFX90A-NEXT: successors: %bb.1(0x80000000)
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
; GFX90A-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF1:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF2:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -620,33 +655,7 @@ body: |
; GFX90A-NEXT: [[DEF29:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF30:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF31:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 64, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 64, implicit $exec, implicit $mode
; GFX90A-NEXT: [[DEF32:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.1:
@@ -655,19 +664,16 @@ body: |
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]]
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]]
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]]
; GFX90A-NEXT: S_NOP 0, implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]]
; GFX90A-NEXT: S_NOP 0, implicit [[DEF8]], implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]]
; GFX90A-NEXT: S_NOP 0, implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]]
; GFX90A-NEXT: S_NOP 0, implicit [[DEF18]], implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]]
; GFX90A-NEXT: S_NOP 0, implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF28]], implicit [[DEF29]], implicit [[DEF30]], implicit [[DEF31]], implicit [[V_CVT_I32_F64_e32_27]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF28]], implicit [[DEF29]], implicit [[DEF30]], implicit [[DEF31]], implicit [[V_CVT_I32_F64_e32_30]]
; GFX90A-NEXT: S_NOP 0, implicit [[DEF32]]
; GFX90A-NEXT: S_ENDPGM 0
bb.0:
@@ -760,3 +766,1773 @@ body: |
S_ENDPGM 0
...
+---
+name: reduce_spill_archvgpr_above_addressable_limit
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; GFX908-LABEL: name: reduce_spill_archvgpr_above_addressable_limit
+ ; GFX908: bb.0:
+ ; GFX908-NEXT: successors: %bb.1(0x80000000)
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_34:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 34, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_35:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 35, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_36:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 36, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_37:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 37, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_38:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 38, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_39:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 39, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_40:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 40, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_41:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 41, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_42:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 42, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_43:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 43, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_44:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 44, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_45:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 45, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_46:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 46, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_47:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 47, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_48:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 48, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_49:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 49, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_50:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 50, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_51:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 51, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_52:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 52, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_53:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 53, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_54:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 54, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_55:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 55, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_56:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 56, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_57:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 57, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_58:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 58, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_59:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 59, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_60:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 60, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_61:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 61, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_62:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 62, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_63:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 63, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_64:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 64, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_65:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 65, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_66:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 66, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_67:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 67, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_68:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 68, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_69:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 69, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_70:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 70, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_71:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 71, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_72:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 72, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_73:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 73, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_74:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 74, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_75:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 75, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_76:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 76, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_77:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 77, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_78:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 78, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_79:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 79, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_80:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 80, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_81:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 81, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_82:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 82, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_83:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 83, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_84:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 84, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_85:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 85, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_86:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 86, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_87:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 87, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_88:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 88, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_89:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 89, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_90:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 90, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_91:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 91, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_92:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 92, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_93:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 93, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_94:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 94, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_95:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 95, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_96:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 96, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_97:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 97, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_98:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 98, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_99:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 99, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_100:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 100, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_101:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 101, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_102:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 102, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_103:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 103, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_104:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 104, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_105:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 105, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_106:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 106, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_107:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 107, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_108:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 108, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_109:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 109, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_110:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 110, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_111:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 111, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_112:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 112, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_113:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 113, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_114:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 114, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_115:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 115, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_116:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 116, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_117:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 117, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_118:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 118, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_119:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 119, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_120:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 120, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_121:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 121, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_122:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 122, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_123:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 123, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_124:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 124, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_125:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 125, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_126:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 126, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_127:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 127, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_128:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 128, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_129:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 129, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_130:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 130, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_131:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 131, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_132:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 132, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_133:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 133, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_134:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 134, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_135:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 135, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_136:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 136, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_137:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 137, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_138:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 138, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_139:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 139, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_140:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 140, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_141:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 141, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_142:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 142, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_143:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 143, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_144:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 144, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_145:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 145, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_146:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 146, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_147:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 147, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_148:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 148, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_149:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 149, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_150:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 150, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_151:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 151, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_152:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 152, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_153:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 153, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_154:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 154, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_155:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 155, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_156:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 156, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_157:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 157, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_158:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 158, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_159:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 159, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_160:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 160, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_161:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 161, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_162:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 162, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_163:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 163, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_164:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 164, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_165:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 165, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_166:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 166, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_167:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 167, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_168:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 168, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_169:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 169, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_170:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 170, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_171:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 171, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_172:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 172, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_173:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 173, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_174:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 174, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_175:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 175, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_176:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 176, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_177:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 177, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_178:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 178, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_179:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 179, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_180:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 180, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_181:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 181, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_182:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 182, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_183:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 183, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_184:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 184, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_185:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 185, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_186:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 186, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_187:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 187, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_188:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 188, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_189:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 189, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_190:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 190, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_191:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 191, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_192:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 192, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_193:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 193, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_194:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 194, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_195:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 195, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_196:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 196, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_197:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 197, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_198:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 198, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_199:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 199, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_200:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 200, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_201:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 201, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_202:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 202, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_203:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 203, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_204:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 204, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_205:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 205, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_206:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 206, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_207:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 207, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_208:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 208, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_209:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 209, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_210:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 210, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_211:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 211, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_212:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 212, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_213:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 213, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_214:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 214, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_215:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 215, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_216:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 216, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_217:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 217, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_218:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 218, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_219:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 219, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_220:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 220, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_221:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 221, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_222:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 222, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_223:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 223, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_224:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 224, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_225:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 225, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_226:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 226, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_227:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 227, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_228:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 228, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_229:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 229, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_230:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 230, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_231:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 231, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_232:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 232, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_233:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 233, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_234:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 234, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_235:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 235, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_236:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 236, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_237:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 237, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_238:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 238, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_239:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 239, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_240:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 240, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_241:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 241, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_242:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 242, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_243:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 243, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_244:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 244, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_245:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 245, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_246:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 246, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_247:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 247, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_248:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 248, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_249:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 249, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_250:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 250, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_251:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 251, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_252:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 252, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_253:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 253, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_254:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 254, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_255:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 256, implicit $exec, implicit $mode
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.1:
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]], implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]], implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]], implicit [[V_CVT_I32_F64_e32_34]], implicit [[V_CVT_I32_F64_e32_35]], implicit [[V_CVT_I32_F64_e32_36]], implicit [[V_CVT_I32_F64_e32_37]], implicit [[V_CVT_I32_F64_e32_38]], implicit [[V_CVT_I32_F64_e32_39]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_40]], implicit [[V_CVT_I32_F64_e32_41]], implicit [[V_CVT_I32_F64_e32_42]], implicit [[V_CVT_I32_F64_e32_43]], implicit [[V_CVT_I32_F64_e32_44]], implicit [[V_CVT_I32_F64_e32_45]], implicit [[V_CVT_I32_F64_e32_46]], implicit [[V_CVT_I32_F64_e32_47]], implicit [[V_CVT_I32_F64_e32_48]], implicit [[V_CVT_I32_F64_e32_49]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_50]], implicit [[V_CVT_I32_F64_e32_51]], implicit [[V_CVT_I32_F64_e32_52]], implicit [[V_CVT_I32_F64_e32_53]], implicit [[V_CVT_I32_F64_e32_54]], implicit [[V_CVT_I32_F64_e32_55]], implicit [[V_CVT_I32_F64_e32_56]], implicit [[V_CVT_I32_F64_e32_57]], implicit [[V_CVT_I32_F64_e32_58]], implicit [[V_CVT_I32_F64_e32_59]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_60]], implicit [[V_CVT_I32_F64_e32_61]], implicit [[V_CVT_I32_F64_e32_62]], implicit [[V_CVT_I32_F64_e32_63]], implicit [[V_CVT_I32_F64_e32_64]], implicit [[V_CVT_I32_F64_e32_65]], implicit [[V_CVT_I32_F64_e32_66]], implicit [[V_CVT_I32_F64_e32_67]], implicit [[V_CVT_I32_F64_e32_68]], implicit [[V_CVT_I32_F64_e32_69]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_70]], implicit [[V_CVT_I32_F64_e32_71]], implicit [[V_CVT_I32_F64_e32_72]], implicit [[V_CVT_I32_F64_e32_73]], implicit [[V_CVT_I32_F64_e32_74]], implicit [[V_CVT_I32_F64_e32_75]], implicit [[V_CVT_I32_F64_e32_76]], implicit [[V_CVT_I32_F64_e32_77]], implicit [[V_CVT_I32_F64_e32_78]], implicit [[V_CVT_I32_F64_e32_79]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_80]], implicit [[V_CVT_I32_F64_e32_81]], implicit [[V_CVT_I32_F64_e32_82]], implicit [[V_CVT_I32_F64_e32_83]], implicit [[V_CVT_I32_F64_e32_84]], implicit [[V_CVT_I32_F64_e32_85]], implicit [[V_CVT_I32_F64_e32_86]], implicit [[V_CVT_I32_F64_e32_87]], implicit [[V_CVT_I32_F64_e32_88]], implicit [[V_CVT_I32_F64_e32_89]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_90]], implicit [[V_CVT_I32_F64_e32_91]], implicit [[V_CVT_I32_F64_e32_92]], implicit [[V_CVT_I32_F64_e32_93]], implicit [[V_CVT_I32_F64_e32_94]], implicit [[V_CVT_I32_F64_e32_95]], implicit [[V_CVT_I32_F64_e32_96]], implicit [[V_CVT_I32_F64_e32_97]], implicit [[V_CVT_I32_F64_e32_98]], implicit [[V_CVT_I32_F64_e32_99]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_100]], implicit [[V_CVT_I32_F64_e32_101]], implicit [[V_CVT_I32_F64_e32_102]], implicit [[V_CVT_I32_F64_e32_103]], implicit [[V_CVT_I32_F64_e32_104]], implicit [[V_CVT_I32_F64_e32_105]], implicit [[V_CVT_I32_F64_e32_106]], implicit [[V_CVT_I32_F64_e32_107]], implicit [[V_CVT_I32_F64_e32_108]], implicit [[V_CVT_I32_F64_e32_109]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_110]], implicit [[V_CVT_I32_F64_e32_111]], implicit [[V_CVT_I32_F64_e32_112]], implicit [[V_CVT_I32_F64_e32_113]], implicit [[V_CVT_I32_F64_e32_114]], implicit [[V_CVT_I32_F64_e32_115]], implicit [[V_CVT_I32_F64_e32_116]], implicit [[V_CVT_I32_F64_e32_117]], implicit [[V_CVT_I32_F64_e32_118]], implicit [[V_CVT_I32_F64_e32_119]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_120]], implicit [[V_CVT_I32_F64_e32_121]], implicit [[V_CVT_I32_F64_e32_122]], implicit [[V_CVT_I32_F64_e32_123]], implicit [[V_CVT_I32_F64_e32_124]], implicit [[V_CVT_I32_F64_e32_125]], implicit [[V_CVT_I32_F64_e32_126]], implicit [[V_CVT_I32_F64_e32_127]], implicit [[V_CVT_I32_F64_e32_128]], implicit [[V_CVT_I32_F64_e32_129]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_130]], implicit [[V_CVT_I32_F64_e32_131]], implicit [[V_CVT_I32_F64_e32_132]], implicit [[V_CVT_I32_F64_e32_133]], implicit [[V_CVT_I32_F64_e32_134]], implicit [[V_CVT_I32_F64_e32_135]], implicit [[V_CVT_I32_F64_e32_136]], implicit [[V_CVT_I32_F64_e32_137]], implicit [[V_CVT_I32_F64_e32_138]], implicit [[V_CVT_I32_F64_e32_139]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_140]], implicit [[V_CVT_I32_F64_e32_141]], implicit [[V_CVT_I32_F64_e32_142]], implicit [[V_CVT_I32_F64_e32_143]], implicit [[V_CVT_I32_F64_e32_144]], implicit [[V_CVT_I32_F64_e32_145]], implicit [[V_CVT_I32_F64_e32_146]], implicit [[V_CVT_I32_F64_e32_147]], implicit [[V_CVT_I32_F64_e32_148]], implicit [[V_CVT_I32_F64_e32_149]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_150]], implicit [[V_CVT_I32_F64_e32_151]], implicit [[V_CVT_I32_F64_e32_152]], implicit [[V_CVT_I32_F64_e32_153]], implicit [[V_CVT_I32_F64_e32_154]], implicit [[V_CVT_I32_F64_e32_155]], implicit [[V_CVT_I32_F64_e32_156]], implicit [[V_CVT_I32_F64_e32_157]], implicit [[V_CVT_I32_F64_e32_158]], implicit [[V_CVT_I32_F64_e32_159]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_160]], implicit [[V_CVT_I32_F64_e32_161]], implicit [[V_CVT_I32_F64_e32_162]], implicit [[V_CVT_I32_F64_e32_163]], implicit [[V_CVT_I32_F64_e32_164]], implicit [[V_CVT_I32_F64_e32_165]], implicit [[V_CVT_I32_F64_e32_166]], implicit [[V_CVT_I32_F64_e32_167]], implicit [[V_CVT_I32_F64_e32_168]], implicit [[V_CVT_I32_F64_e32_169]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_170]], implicit [[V_CVT_I32_F64_e32_171]], implicit [[V_CVT_I32_F64_e32_172]], implicit [[V_CVT_I32_F64_e32_173]], implicit [[V_CVT_I32_F64_e32_174]], implicit [[V_CVT_I32_F64_e32_175]], implicit [[V_CVT_I32_F64_e32_176]], implicit [[V_CVT_I32_F64_e32_177]], implicit [[V_CVT_I32_F64_e32_178]], implicit [[V_CVT_I32_F64_e32_179]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_180]], implicit [[V_CVT_I32_F64_e32_181]], implicit [[V_CVT_I32_F64_e32_182]], implicit [[V_CVT_I32_F64_e32_183]], implicit [[V_CVT_I32_F64_e32_184]], implicit [[V_CVT_I32_F64_e32_185]], implicit [[V_CVT_I32_F64_e32_186]], implicit [[V_CVT_I32_F64_e32_187]], implicit [[V_CVT_I32_F64_e32_188]], implicit [[V_CVT_I32_F64_e32_189]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_190]], implicit [[V_CVT_I32_F64_e32_191]], implicit [[V_CVT_I32_F64_e32_192]], implicit [[V_CVT_I32_F64_e32_193]], implicit [[V_CVT_I32_F64_e32_194]], implicit [[V_CVT_I32_F64_e32_195]], implicit [[V_CVT_I32_F64_e32_196]], implicit [[V_CVT_I32_F64_e32_197]], implicit [[V_CVT_I32_F64_e32_198]], implicit [[V_CVT_I32_F64_e32_199]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_200]], implicit [[V_CVT_I32_F64_e32_201]], implicit [[V_CVT_I32_F64_e32_202]], implicit [[V_CVT_I32_F64_e32_203]], implicit [[V_CVT_I32_F64_e32_204]], implicit [[V_CVT_I32_F64_e32_205]], implicit [[V_CVT_I32_F64_e32_206]], implicit [[V_CVT_I32_F64_e32_207]], implicit [[V_CVT_I32_F64_e32_208]], implicit [[V_CVT_I32_F64_e32_209]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_210]], implicit [[V_CVT_I32_F64_e32_211]], implicit [[V_CVT_I32_F64_e32_212]], implicit [[V_CVT_I32_F64_e32_213]], implicit [[V_CVT_I32_F64_e32_214]], implicit [[V_CVT_I32_F64_e32_215]], implicit [[V_CVT_I32_F64_e32_216]], implicit [[V_CVT_I32_F64_e32_217]], implicit [[V_CVT_I32_F64_e32_218]], implicit [[V_CVT_I32_F64_e32_219]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_220]], implicit [[V_CVT_I32_F64_e32_221]], implicit [[V_CVT_I32_F64_e32_222]], implicit [[V_CVT_I32_F64_e32_223]], implicit [[V_CVT_I32_F64_e32_224]], implicit [[V_CVT_I32_F64_e32_225]], implicit [[V_CVT_I32_F64_e32_226]], implicit [[V_CVT_I32_F64_e32_227]], implicit [[V_CVT_I32_F64_e32_228]], implicit [[V_CVT_I32_F64_e32_229]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_230]], implicit [[V_CVT_I32_F64_e32_231]], implicit [[V_CVT_I32_F64_e32_232]], implicit [[V_CVT_I32_F64_e32_233]], implicit [[V_CVT_I32_F64_e32_234]], implicit [[V_CVT_I32_F64_e32_235]], implicit [[V_CVT_I32_F64_e32_236]], implicit [[V_CVT_I32_F64_e32_237]], implicit [[V_CVT_I32_F64_e32_238]], implicit [[V_CVT_I32_F64_e32_239]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_240]], implicit [[V_CVT_I32_F64_e32_241]], implicit [[V_CVT_I32_F64_e32_242]], implicit [[V_CVT_I32_F64_e32_243]], implicit [[V_CVT_I32_F64_e32_244]], implicit [[V_CVT_I32_F64_e32_245]], implicit [[V_CVT_I32_F64_e32_246]], implicit [[V_CVT_I32_F64_e32_247]], implicit [[V_CVT_I32_F64_e32_248]], implicit [[V_CVT_I32_F64_e32_249]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_256:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 255, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_250]], implicit [[V_CVT_I32_F64_e32_251]], implicit [[V_CVT_I32_F64_e32_252]], implicit [[V_CVT_I32_F64_e32_253]], implicit [[V_CVT_I32_F64_e32_254]], implicit [[V_CVT_I32_F64_e32_256]], implicit [[V_CVT_I32_F64_e32_255]], implicit [[DEF]]
+ ; GFX908-NEXT: S_ENDPGM 0
+ ;
+ ; GFX90A-LABEL: name: reduce_spill_archvgpr_above_addressable_limit
+ ; GFX90A: bb.0:
+ ; GFX90A-NEXT: successors: %bb.1(0x80000000)
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_34:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 34, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_35:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 35, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_36:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 36, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_37:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 37, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_38:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 38, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_39:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 39, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_40:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 40, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_41:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 41, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_42:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 42, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_43:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 43, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_44:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 44, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_45:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 45, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_46:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 46, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_47:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 47, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_48:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 48, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_49:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 49, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_50:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 50, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_51:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 51, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_52:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 52, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_53:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 53, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_54:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 54, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_55:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 55, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_56:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 56, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_57:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 57, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_58:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 58, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_59:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 59, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_60:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 60, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_61:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 61, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_62:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 62, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_63:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 63, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_64:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 64, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_65:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 65, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_66:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 66, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_67:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 67, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_68:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 68, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_69:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 69, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_70:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 70, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_71:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 71, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_72:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 72, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_73:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 73, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_74:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 74, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_75:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 75, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_76:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 76, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_77:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 77, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_78:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 78, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_79:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 79, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_80:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 80, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_81:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 81, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_82:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 82, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_83:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 83, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_84:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 84, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_85:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 85, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_86:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 86, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_87:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 87, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_88:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 88, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_89:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 89, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_90:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 90, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_91:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 91, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_92:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 92, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_93:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 93, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_94:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 94, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_95:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 95, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_96:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 96, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_97:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 97, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_98:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 98, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_99:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 99, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_100:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 100, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_101:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 101, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_102:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 102, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_103:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 103, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_104:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 104, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_105:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 105, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_106:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 106, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_107:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 107, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_108:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 108, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_109:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 109, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_110:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 110, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_111:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 111, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_112:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 112, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_113:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 113, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_114:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 114, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_115:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 115, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_116:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 116, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_117:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 117, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_118:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 118, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_119:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 119, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_120:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 120, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_121:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 121, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_122:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 122, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_123:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 123, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_124:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 124, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_125:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 125, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_126:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 126, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_127:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 127, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_128:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 128, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_129:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 129, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_130:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 130, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_131:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 131, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_132:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 132, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_133:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 133, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_134:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 134, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_135:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 135, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_136:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 136, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_137:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 137, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_138:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 138, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_139:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 139, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_140:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 140, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_141:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 141, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_142:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 142, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_143:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 143, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_144:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 144, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_145:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 145, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_146:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 146, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_147:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 147, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_148:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 148, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_149:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 149, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_150:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 150, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_151:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 151, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_152:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 152, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_153:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 153, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_154:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 154, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_155:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 155, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_156:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 156, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_157:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 157, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_158:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 158, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_159:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 159, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_160:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 160, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_161:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 161, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_162:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 162, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_163:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 163, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_164:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 164, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_165:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 165, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_166:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 166, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_167:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 167, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_168:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 168, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_169:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 169, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_170:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 170, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_171:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 171, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_172:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 172, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_173:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 173, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_174:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 174, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_175:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 175, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_176:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 176, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_177:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 177, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_178:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 178, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_179:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 179, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_180:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 180, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_181:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 181, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_182:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 182, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_183:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 183, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_184:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 184, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_185:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 185, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_186:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 186, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_187:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 187, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_188:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 188, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_189:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 189, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_190:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 190, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_191:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 191, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_192:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 192, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_193:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 193, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_194:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 194, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_195:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 195, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_196:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 196, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_197:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 197, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_198:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 198, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_199:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 199, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_200:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 200, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_201:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 201, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_202:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 202, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_203:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 203, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_204:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 204, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_205:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 205, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_206:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 206, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_207:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 207, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_208:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 208, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_209:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 209, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_210:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 210, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_211:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 211, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_212:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 212, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_213:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 213, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_214:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 214, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_215:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 215, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_216:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 216, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_217:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 217, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_218:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 218, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_219:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 219, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_220:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 220, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_221:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 221, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_222:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 222, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_223:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 223, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_224:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 224, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_225:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 225, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_226:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 226, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_227:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 227, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_228:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 228, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_229:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 229, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_230:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 230, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_231:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 231, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_232:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 232, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_233:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 233, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_234:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 234, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_235:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 235, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_236:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 236, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_237:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 237, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_238:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 238, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_239:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 239, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_240:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 240, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_241:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 241, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_242:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 242, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_243:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 243, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_244:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 244, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_245:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 245, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_246:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 246, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_247:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 247, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_248:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 248, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_249:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 249, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_250:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 250, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_251:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 251, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_252:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 252, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_253:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 253, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_254:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 254, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_255:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 256, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: bb.1:
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]], implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]], implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]], implicit [[V_CVT_I32_F64_e32_34]], implicit [[V_CVT_I32_F64_e32_35]], implicit [[V_CVT_I32_F64_e32_36]], implicit [[V_CVT_I32_F64_e32_37]], implicit [[V_CVT_I32_F64_e32_38]], implicit [[V_CVT_I32_F64_e32_39]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_40]], implicit [[V_CVT_I32_F64_e32_41]], implicit [[V_CVT_I32_F64_e32_42]], implicit [[V_CVT_I32_F64_e32_43]], implicit [[V_CVT_I32_F64_e32_44]], implicit [[V_CVT_I32_F64_e32_45]], implicit [[V_CVT_I32_F64_e32_46]], implicit [[V_CVT_I32_F64_e32_47]], implicit [[V_CVT_I32_F64_e32_48]], implicit [[V_CVT_I32_F64_e32_49]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_50]], implicit [[V_CVT_I32_F64_e32_51]], implicit [[V_CVT_I32_F64_e32_52]], implicit [[V_CVT_I32_F64_e32_53]], implicit [[V_CVT_I32_F64_e32_54]], implicit [[V_CVT_I32_F64_e32_55]], implicit [[V_CVT_I32_F64_e32_56]], implicit [[V_CVT_I32_F64_e32_57]], implicit [[V_CVT_I32_F64_e32_58]], implicit [[V_CVT_I32_F64_e32_59]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_60]], implicit [[V_CVT_I32_F64_e32_61]], implicit [[V_CVT_I32_F64_e32_62]], implicit [[V_CVT_I32_F64_e32_63]], implicit [[V_CVT_I32_F64_e32_64]], implicit [[V_CVT_I32_F64_e32_65]], implicit [[V_CVT_I32_F64_e32_66]], implicit [[V_CVT_I32_F64_e32_67]], implicit [[V_CVT_I32_F64_e32_68]], implicit [[V_CVT_I32_F64_e32_69]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_70]], implicit [[V_CVT_I32_F64_e32_71]], implicit [[V_CVT_I32_F64_e32_72]], implicit [[V_CVT_I32_F64_e32_73]], implicit [[V_CVT_I32_F64_e32_74]], implicit [[V_CVT_I32_F64_e32_75]], implicit [[V_CVT_I32_F64_e32_76]], implicit [[V_CVT_I32_F64_e32_77]], implicit [[V_CVT_I32_F64_e32_78]], implicit [[V_CVT_I32_F64_e32_79]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_80]], implicit [[V_CVT_I32_F64_e32_81]], implicit [[V_CVT_I32_F64_e32_82]], implicit [[V_CVT_I32_F64_e32_83]], implicit [[V_CVT_I32_F64_e32_84]], implicit [[V_CVT_I32_F64_e32_85]], implicit [[V_CVT_I32_F64_e32_86]], implicit [[V_CVT_I32_F64_e32_87]], implicit [[V_CVT_I32_F64_e32_88]], implicit [[V_CVT_I32_F64_e32_89]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_90]], implicit [[V_CVT_I32_F64_e32_91]], implicit [[V_CVT_I32_F64_e32_92]], implicit [[V_CVT_I32_F64_e32_93]], implicit [[V_CVT_I32_F64_e32_94]], implicit [[V_CVT_I32_F64_e32_95]], implicit [[V_CVT_I32_F64_e32_96]], implicit [[V_CVT_I32_F64_e32_97]], implicit [[V_CVT_I32_F64_e32_98]], implicit [[V_CVT_I32_F64_e32_99]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_100]], implicit [[V_CVT_I32_F64_e32_101]], implicit [[V_CVT_I32_F64_e32_102]], implicit [[V_CVT_I32_F64_e32_103]], implicit [[V_CVT_I32_F64_e32_104]], implicit [[V_CVT_I32_F64_e32_105]], implicit [[V_CVT_I32_F64_e32_106]], implicit [[V_CVT_I32_F64_e32_107]], implicit [[V_CVT_I32_F64_e32_108]], implicit [[V_CVT_I32_F64_e32_109]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_110]], implicit [[V_CVT_I32_F64_e32_111]], implicit [[V_CVT_I32_F64_e32_112]], implicit [[V_CVT_I32_F64_e32_113]], implicit [[V_CVT_I32_F64_e32_114]], implicit [[V_CVT_I32_F64_e32_115]], implicit [[V_CVT_I32_F64_e32_116]], implicit [[V_CVT_I32_F64_e32_117]], implicit [[V_CVT_I32_F64_e32_118]], implicit [[V_CVT_I32_F64_e32_119]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_120]], implicit [[V_CVT_I32_F64_e32_121]], implicit [[V_CVT_I32_F64_e32_122]], implicit [[V_CVT_I32_F64_e32_123]], implicit [[V_CVT_I32_F64_e32_124]], implicit [[V_CVT_I32_F64_e32_125]], implicit [[V_CVT_I32_F64_e32_126]], implicit [[V_CVT_I32_F64_e32_127]], implicit [[V_CVT_I32_F64_e32_128]], implicit [[V_CVT_I32_F64_e32_129]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_130]], implicit [[V_CVT_I32_F64_e32_131]], implicit [[V_CVT_I32_F64_e32_132]], implicit [[V_CVT_I32_F64_e32_133]], implicit [[V_CVT_I32_F64_e32_134]], implicit [[V_CVT_I32_F64_e32_135]], implicit [[V_CVT_I32_F64_e32_136]], implicit [[V_CVT_I32_F64_e32_137]], implicit [[V_CVT_I32_F64_e32_138]], implicit [[V_CVT_I32_F64_e32_139]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_140]], implicit [[V_CVT_I32_F64_e32_141]], implicit [[V_CVT_I32_F64_e32_142]], implicit [[V_CVT_I32_F64_e32_143]], implicit [[V_CVT_I32_F64_e32_144]], implicit [[V_CVT_I32_F64_e32_145]], implicit [[V_CVT_I32_F64_e32_146]], implicit [[V_CVT_I32_F64_e32_147]], implicit [[V_CVT_I32_F64_e32_148]], implicit [[V_CVT_I32_F64_e32_149]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_150]], implicit [[V_CVT_I32_F64_e32_151]], implicit [[V_CVT_I32_F64_e32_152]], implicit [[V_CVT_I32_F64_e32_153]], implicit [[V_CVT_I32_F64_e32_154]], implicit [[V_CVT_I32_F64_e32_155]], implicit [[V_CVT_I32_F64_e32_156]], implicit [[V_CVT_I32_F64_e32_157]], implicit [[V_CVT_I32_F64_e32_158]], implicit [[V_CVT_I32_F64_e32_159]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_160]], implicit [[V_CVT_I32_F64_e32_161]], implicit [[V_CVT_I32_F64_e32_162]], implicit [[V_CVT_I32_F64_e32_163]], implicit [[V_CVT_I32_F64_e32_164]], implicit [[V_CVT_I32_F64_e32_165]], implicit [[V_CVT_I32_F64_e32_166]], implicit [[V_CVT_I32_F64_e32_167]], implicit [[V_CVT_I32_F64_e32_168]], implicit [[V_CVT_I32_F64_e32_169]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_170]], implicit [[V_CVT_I32_F64_e32_171]], implicit [[V_CVT_I32_F64_e32_172]], implicit [[V_CVT_I32_F64_e32_173]], implicit [[V_CVT_I32_F64_e32_174]], implicit [[V_CVT_I32_F64_e32_175]], implicit [[V_CVT_I32_F64_e32_176]], implicit [[V_CVT_I32_F64_e32_177]], implicit [[V_CVT_I32_F64_e32_178]], implicit [[V_CVT_I32_F64_e32_179]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_180]], implicit [[V_CVT_I32_F64_e32_181]], implicit [[V_CVT_I32_F64_e32_182]], implicit [[V_CVT_I32_F64_e32_183]], implicit [[V_CVT_I32_F64_e32_184]], implicit [[V_CVT_I32_F64_e32_185]], implicit [[V_CVT_I32_F64_e32_186]], implicit [[V_CVT_I32_F64_e32_187]], implicit [[V_CVT_I32_F64_e32_188]], implicit [[V_CVT_I32_F64_e32_189]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_190]], implicit [[V_CVT_I32_F64_e32_191]], implicit [[V_CVT_I32_F64_e32_192]], implicit [[V_CVT_I32_F64_e32_193]], implicit [[V_CVT_I32_F64_e32_194]], implicit [[V_CVT_I32_F64_e32_195]], implicit [[V_CVT_I32_F64_e32_196]], implicit [[V_CVT_I32_F64_e32_197]], implicit [[V_CVT_I32_F64_e32_198]], implicit [[V_CVT_I32_F64_e32_199]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_200]], implicit [[V_CVT_I32_F64_e32_201]], implicit [[V_CVT_I32_F64_e32_202]], implicit [[V_CVT_I32_F64_e32_203]], implicit [[V_CVT_I32_F64_e32_204]], implicit [[V_CVT_I32_F64_e32_205]], implicit [[V_CVT_I32_F64_e32_206]], implicit [[V_CVT_I32_F64_e32_207]], implicit [[V_CVT_I32_F64_e32_208]], implicit [[V_CVT_I32_F64_e32_209]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_210]], implicit [[V_CVT_I32_F64_e32_211]], implicit [[V_CVT_I32_F64_e32_212]], implicit [[V_CVT_I32_F64_e32_213]], implicit [[V_CVT_I32_F64_e32_214]], implicit [[V_CVT_I32_F64_e32_215]], implicit [[V_CVT_I32_F64_e32_216]], implicit [[V_CVT_I32_F64_e32_217]], implicit [[V_CVT_I32_F64_e32_218]], implicit [[V_CVT_I32_F64_e32_219]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_220]], implicit [[V_CVT_I32_F64_e32_221]], implicit [[V_CVT_I32_F64_e32_222]], implicit [[V_CVT_I32_F64_e32_223]], implicit [[V_CVT_I32_F64_e32_224]], implicit [[V_CVT_I32_F64_e32_225]], implicit [[V_CVT_I32_F64_e32_226]], implicit [[V_CVT_I32_F64_e32_227]], implicit [[V_CVT_I32_F64_e32_228]], implicit [[V_CVT_I32_F64_e32_229]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_230]], implicit [[V_CVT_I32_F64_e32_231]], implicit [[V_CVT_I32_F64_e32_232]], implicit [[V_CVT_I32_F64_e32_233]], implicit [[V_CVT_I32_F64_e32_234]], implicit [[V_CVT_I32_F64_e32_235]], implicit [[V_CVT_I32_F64_e32_236]], implicit [[V_CVT_I32_F64_e32_237]], implicit [[V_CVT_I32_F64_e32_238]], implicit [[V_CVT_I32_F64_e32_239]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_240]], implicit [[V_CVT_I32_F64_e32_241]], implicit [[V_CVT_I32_F64_e32_242]], implicit [[V_CVT_I32_F64_e32_243]], implicit [[V_CVT_I32_F64_e32_244]], implicit [[V_CVT_I32_F64_e32_245]], implicit [[V_CVT_I32_F64_e32_246]], implicit [[V_CVT_I32_F64_e32_247]], implicit [[V_CVT_I32_F64_e32_248]], implicit [[V_CVT_I32_F64_e32_249]]
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_256:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 255, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_250]], implicit [[V_CVT_I32_F64_e32_251]], implicit [[V_CVT_I32_F64_e32_252]], implicit [[V_CVT_I32_F64_e32_253]], implicit [[V_CVT_I32_F64_e32_254]], implicit [[V_CVT_I32_F64_e32_256]], implicit [[V_CVT_I32_F64_e32_255]], implicit [[DEF]]
+ ; GFX90A-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1
+
+ %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
+ %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ %3:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ %4:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
+ %5:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
+ %6:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
+ %7:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
+ %8:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
+ %9:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
+ %10:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ %11:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
+ %12:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ %13:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
+ %14:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
+ %15:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
+ %16:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
+ %17:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ %18:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
+ %19:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ %20:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ %21:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ %22:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ %23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ %25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ %26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ %27:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
+ %28:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
+ %29:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
+ %30:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
+ %31:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
+ %32:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
+ %33:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode, implicit-def $m0
+ %34:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 34, implicit $exec, implicit $mode, implicit-def $m0
+ %35:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 35, implicit $exec, implicit $mode, implicit-def $m0
+ %36:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 36, implicit $exec, implicit $mode, implicit-def $m0
+ %37:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 37, implicit $exec, implicit $mode, implicit-def $m0
+ %38:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 38, implicit $exec, implicit $mode, implicit-def $m0
+ %39:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 39, implicit $exec, implicit $mode, implicit-def $m0
+ %40:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 40, implicit $exec, implicit $mode, implicit-def $m0
+ %41:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 41, implicit $exec, implicit $mode, implicit-def $m0
+ %42:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 42, implicit $exec, implicit $mode, implicit-def $m0
+ %43:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 43, implicit $exec, implicit $mode, implicit-def $m0
+ %44:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 44, implicit $exec, implicit $mode, implicit-def $m0
+ %45:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 45, implicit $exec, implicit $mode, implicit-def $m0
+ %46:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 46, implicit $exec, implicit $mode, implicit-def $m0
+ %47:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 47, implicit $exec, implicit $mode, implicit-def $m0
+ %48:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 48, implicit $exec, implicit $mode, implicit-def $m0
+ %49:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 49, implicit $exec, implicit $mode, implicit-def $m0
+ %50:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 50, implicit $exec, implicit $mode, implicit-def $m0
+ %51:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 51, implicit $exec, implicit $mode, implicit-def $m0
+ %52:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 52, implicit $exec, implicit $mode, implicit-def $m0
+ %53:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 53, implicit $exec, implicit $mode, implicit-def $m0
+ %54:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 54, implicit $exec, implicit $mode, implicit-def $m0
+ %55:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 55, implicit $exec, implicit $mode, implicit-def $m0
+ %56:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 56, implicit $exec, implicit $mode, implicit-def $m0
+ %57:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 57, implicit $exec, implicit $mode, implicit-def $m0
+ %58:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 58, implicit $exec, implicit $mode, implicit-def $m0
+ %59:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 59, implicit $exec, implicit $mode, implicit-def $m0
+ %60:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 60, implicit $exec, implicit $mode, implicit-def $m0
+ %61:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 61, implicit $exec, implicit $mode, implicit-def $m0
+ %62:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 62, implicit $exec, implicit $mode, implicit-def $m0
+ %63:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 63, implicit $exec, implicit $mode, implicit-def $m0
+ %64:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 64, implicit $exec, implicit $mode, implicit-def $m0
+ %65:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 65, implicit $exec, implicit $mode, implicit-def $m0
+ %66:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 66, implicit $exec, implicit $mode, implicit-def $m0
+ %67:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 67, implicit $exec, implicit $mode, implicit-def $m0
+ %68:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 68, implicit $exec, implicit $mode, implicit-def $m0
+ %69:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 69, implicit $exec, implicit $mode, implicit-def $m0
+ %70:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 70, implicit $exec, implicit $mode, implicit-def $m0
+ %71:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 71, implicit $exec, implicit $mode, implicit-def $m0
+ %72:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 72, implicit $exec, implicit $mode, implicit-def $m0
+ %73:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 73, implicit $exec, implicit $mode, implicit-def $m0
+ %74:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 74, implicit $exec, implicit $mode, implicit-def $m0
+ %75:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 75, implicit $exec, implicit $mode, implicit-def $m0
+ %76:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 76, implicit $exec, implicit $mode, implicit-def $m0
+ %77:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 77, implicit $exec, implicit $mode, implicit-def $m0
+ %78:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 78, implicit $exec, implicit $mode, implicit-def $m0
+ %79:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 79, implicit $exec, implicit $mode, implicit-def $m0
+ %80:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 80, implicit $exec, implicit $mode, implicit-def $m0
+ %81:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 81, implicit $exec, implicit $mode, implicit-def $m0
+ %82:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 82, implicit $exec, implicit $mode, implicit-def $m0
+ %83:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 83, implicit $exec, implicit $mode, implicit-def $m0
+ %84:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 84, implicit $exec, implicit $mode, implicit-def $m0
+ %85:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 85, implicit $exec, implicit $mode, implicit-def $m0
+ %86:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 86, implicit $exec, implicit $mode, implicit-def $m0
+ %87:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 87, implicit $exec, implicit $mode, implicit-def $m0
+ %88:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 88, implicit $exec, implicit $mode, implicit-def $m0
+ %89:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 89, implicit $exec, implicit $mode, implicit-def $m0
+ %90:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 90, implicit $exec, implicit $mode, implicit-def $m0
+ %91:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 91, implicit $exec, implicit $mode, implicit-def $m0
+ %92:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 92, implicit $exec, implicit $mode, implicit-def $m0
+ %93:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 93, implicit $exec, implicit $mode, implicit-def $m0
+ %94:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 94, implicit $exec, implicit $mode, implicit-def $m0
+ %95:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 95, implicit $exec, implicit $mode, implicit-def $m0
+ %96:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 96, implicit $exec, implicit $mode, implicit-def $m0
+ %97:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 97, implicit $exec, implicit $mode, implicit-def $m0
+ %98:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 98, implicit $exec, implicit $mode, implicit-def $m0
+ %99:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 99, implicit $exec, implicit $mode, implicit-def $m0
+ %100:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 100, implicit $exec, implicit $mode, implicit-def $m0
+ %101:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 101, implicit $exec, implicit $mode, implicit-def $m0
+ %102:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 102, implicit $exec, implicit $mode, implicit-def $m0
+ %103:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 103, implicit $exec, implicit $mode, implicit-def $m0
+ %104:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 104, implicit $exec, implicit $mode, implicit-def $m0
+ %105:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 105, implicit $exec, implicit $mode, implicit-def $m0
+ %106:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 106, implicit $exec, implicit $mode, implicit-def $m0
+ %107:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 107, implicit $exec, implicit $mode, implicit-def $m0
+ %108:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 108, implicit $exec, implicit $mode, implicit-def $m0
+ %109:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 109, implicit $exec, implicit $mode, implicit-def $m0
+ %110:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 110, implicit $exec, implicit $mode, implicit-def $m0
+ %111:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 111, implicit $exec, implicit $mode, implicit-def $m0
+ %112:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 112, implicit $exec, implicit $mode, implicit-def $m0
+ %113:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 113, implicit $exec, implicit $mode, implicit-def $m0
+ %114:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 114, implicit $exec, implicit $mode, implicit-def $m0
+ %115:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 115, implicit $exec, implicit $mode, implicit-def $m0
+ %116:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 116, implicit $exec, implicit $mode, implicit-def $m0
+ %117:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 117, implicit $exec, implicit $mode, implicit-def $m0
+ %118:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 118, implicit $exec, implicit $mode, implicit-def $m0
+ %119:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 119, implicit $exec, implicit $mode, implicit-def $m0
+ %120:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 120, implicit $exec, implicit $mode, implicit-def $m0
+ %121:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 121, implicit $exec, implicit $mode, implicit-def $m0
+ %122:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 122, implicit $exec, implicit $mode, implicit-def $m0
+ %123:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 123, implicit $exec, implicit $mode, implicit-def $m0
+ %124:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 124, implicit $exec, implicit $mode, implicit-def $m0
+ %125:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 125, implicit $exec, implicit $mode, implicit-def $m0
+ %126:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 126, implicit $exec, implicit $mode, implicit-def $m0
+ %127:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 127, implicit $exec, implicit $mode, implicit-def $m0
+ %128:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 128, implicit $exec, implicit $mode, implicit-def $m0
+ %129:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 129, implicit $exec, implicit $mode, implicit-def $m0
+ %130:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 130, implicit $exec, implicit $mode, implicit-def $m0
+ %131:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 131, implicit $exec, implicit $mode, implicit-def $m0
+ %132:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 132, implicit $exec, implicit $mode, implicit-def $m0
+ %133:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 133, implicit $exec, implicit $mode, implicit-def $m0
+ %134:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 134, implicit $exec, implicit $mode, implicit-def $m0
+ %135:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 135, implicit $exec, implicit $mode, implicit-def $m0
+ %136:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 136, implicit $exec, implicit $mode, implicit-def $m0
+ %137:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 137, implicit $exec, implicit $mode, implicit-def $m0
+ %138:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 138, implicit $exec, implicit $mode, implicit-def $m0
+ %139:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 139, implicit $exec, implicit $mode, implicit-def $m0
+ %140:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 140, implicit $exec, implicit $mode, implicit-def $m0
+ %141:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 141, implicit $exec, implicit $mode, implicit-def $m0
+ %142:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 142, implicit $exec, implicit $mode, implicit-def $m0
+ %143:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 143, implicit $exec, implicit $mode, implicit-def $m0
+ %144:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 144, implicit $exec, implicit $mode, implicit-def $m0
+ %145:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 145, implicit $exec, implicit $mode, implicit-def $m0
+ %146:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 146, implicit $exec, implicit $mode, implicit-def $m0
+ %147:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 147, implicit $exec, implicit $mode, implicit-def $m0
+ %148:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 148, implicit $exec, implicit $mode, implicit-def $m0
+ %149:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 149, implicit $exec, implicit $mode, implicit-def $m0
+ %150:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 150, implicit $exec, implicit $mode, implicit-def $m0
+ %151:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 151, implicit $exec, implicit $mode, implicit-def $m0
+ %152:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 152, implicit $exec, implicit $mode, implicit-def $m0
+ %153:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 153, implicit $exec, implicit $mode, implicit-def $m0
+ %154:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 154, implicit $exec, implicit $mode, implicit-def $m0
+ %155:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 155, implicit $exec, implicit $mode, implicit-def $m0
+ %156:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 156, implicit $exec, implicit $mode, implicit-def $m0
+ %157:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 157, implicit $exec, implicit $mode, implicit-def $m0
+ %158:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 158, implicit $exec, implicit $mode, implicit-def $m0
+ %159:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 159, implicit $exec, implicit $mode, implicit-def $m0
+ %160:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 160, implicit $exec, implicit $mode, implicit-def $m0
+ %161:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 161, implicit $exec, implicit $mode, implicit-def $m0
+ %162:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 162, implicit $exec, implicit $mode, implicit-def $m0
+ %163:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 163, implicit $exec, implicit $mode, implicit-def $m0
+ %164:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 164, implicit $exec, implicit $mode, implicit-def $m0
+ %165:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 165, implicit $exec, implicit $mode, implicit-def $m0
+ %166:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 166, implicit $exec, implicit $mode, implicit-def $m0
+ %167:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 167, implicit $exec, implicit $mode, implicit-def $m0
+ %168:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 168, implicit $exec, implicit $mode, implicit-def $m0
+ %169:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 169, implicit $exec, implicit $mode, implicit-def $m0
+ %170:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 170, implicit $exec, implicit $mode, implicit-def $m0
+ %171:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 171, implicit $exec, implicit $mode, implicit-def $m0
+ %172:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 172, implicit $exec, implicit $mode, implicit-def $m0
+ %173:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 173, implicit $exec, implicit $mode, implicit-def $m0
+ %174:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 174, implicit $exec, implicit $mode, implicit-def $m0
+ %175:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 175, implicit $exec, implicit $mode, implicit-def $m0
+ %176:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 176, implicit $exec, implicit $mode, implicit-def $m0
+ %177:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 177, implicit $exec, implicit $mode, implicit-def $m0
+ %178:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 178, implicit $exec, implicit $mode, implicit-def $m0
+ %179:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 179, implicit $exec, implicit $mode, implicit-def $m0
+ %180:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 180, implicit $exec, implicit $mode, implicit-def $m0
+ %181:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 181, implicit $exec, implicit $mode, implicit-def $m0
+ %182:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 182, implicit $exec, implicit $mode, implicit-def $m0
+ %183:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 183, implicit $exec, implicit $mode, implicit-def $m0
+ %184:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 184, implicit $exec, implicit $mode, implicit-def $m0
+ %185:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 185, implicit $exec, implicit $mode, implicit-def $m0
+ %186:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 186, implicit $exec, implicit $mode, implicit-def $m0
+ %187:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 187, implicit $exec, implicit $mode, implicit-def $m0
+ %188:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 188, implicit $exec, implicit $mode, implicit-def $m0
+ %189:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 189, implicit $exec, implicit $mode, implicit-def $m0
+ %190:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 190, implicit $exec, implicit $mode, implicit-def $m0
+ %191:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 191, implicit $exec, implicit $mode, implicit-def $m0
+ %192:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 192, implicit $exec, implicit $mode, implicit-def $m0
+ %193:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 193, implicit $exec, implicit $mode, implicit-def $m0
+ %194:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 194, implicit $exec, implicit $mode, implicit-def $m0
+ %195:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 195, implicit $exec, implicit $mode, implicit-def $m0
+ %196:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 196, implicit $exec, implicit $mode, implicit-def $m0
+ %197:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 197, implicit $exec, implicit $mode, implicit-def $m0
+ %198:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 198, implicit $exec, implicit $mode, implicit-def $m0
+ %199:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 199, implicit $exec, implicit $mode, implicit-def $m0
+ %200:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 200, implicit $exec, implicit $mode, implicit-def $m0
+ %201:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 201, implicit $exec, implicit $mode, implicit-def $m0
+ %202:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 202, implicit $exec, implicit $mode, implicit-def $m0
+ %203:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 203, implicit $exec, implicit $mode, implicit-def $m0
+ %204:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 204, implicit $exec, implicit $mode, implicit-def $m0
+ %205:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 205, implicit $exec, implicit $mode, implicit-def $m0
+ %206:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 206, implicit $exec, implicit $mode, implicit-def $m0
+ %207:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 207, implicit $exec, implicit $mode, implicit-def $m0
+ %208:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 208, implicit $exec, implicit $mode, implicit-def $m0
+ %209:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 209, implicit $exec, implicit $mode, implicit-def $m0
+ %210:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 210, implicit $exec, implicit $mode, implicit-def $m0
+ %211:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 211, implicit $exec, implicit $mode, implicit-def $m0
+ %212:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 212, implicit $exec, implicit $mode, implicit-def $m0
+ %213:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 213, implicit $exec, implicit $mode, implicit-def $m0
+ %214:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 214, implicit $exec, implicit $mode, implicit-def $m0
+ %215:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 215, implicit $exec, implicit $mode, implicit-def $m0
+ %216:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 216, implicit $exec, implicit $mode, implicit-def $m0
+ %217:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 217, implicit $exec, implicit $mode, implicit-def $m0
+ %218:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 218, implicit $exec, implicit $mode, implicit-def $m0
+ %219:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 219, implicit $exec, implicit $mode, implicit-def $m0
+ %220:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 220, implicit $exec, implicit $mode, implicit-def $m0
+ %221:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 221, implicit $exec, implicit $mode, implicit-def $m0
+ %222:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 222, implicit $exec, implicit $mode, implicit-def $m0
+ %223:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 223, implicit $exec, implicit $mode, implicit-def $m0
+ %224:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 224, implicit $exec, implicit $mode, implicit-def $m0
+ %225:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 225, implicit $exec, implicit $mode, implicit-def $m0
+ %226:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 226, implicit $exec, implicit $mode, implicit-def $m0
+ %227:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 227, implicit $exec, implicit $mode, implicit-def $m0
+ %228:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 228, implicit $exec, implicit $mode, implicit-def $m0
+ %229:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 229, implicit $exec, implicit $mode, implicit-def $m0
+ %230:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 230, implicit $exec, implicit $mode, implicit-def $m0
+ %231:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 231, implicit $exec, implicit $mode, implicit-def $m0
+ %232:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 232, implicit $exec, implicit $mode, implicit-def $m0
+ %233:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 233, implicit $exec, implicit $mode, implicit-def $m0
+ %234:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 234, implicit $exec, implicit $mode, implicit-def $m0
+ %235:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 235, implicit $exec, implicit $mode, implicit-def $m0
+ %236:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 236, implicit $exec, implicit $mode, implicit-def $m0
+ %237:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 237, implicit $exec, implicit $mode, implicit-def $m0
+ %238:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 238, implicit $exec, implicit $mode, implicit-def $m0
+ %239:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 239, implicit $exec, implicit $mode, implicit-def $m0
+ %240:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 240, implicit $exec, implicit $mode, implicit-def $m0
+ %241:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 241, implicit $exec, implicit $mode, implicit-def $m0
+ %242:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 242, implicit $exec, implicit $mode, implicit-def $m0
+ %243:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 243, implicit $exec, implicit $mode, implicit-def $m0
+ %244:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 244, implicit $exec, implicit $mode, implicit-def $m0
+ %245:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 245, implicit $exec, implicit $mode, implicit-def $m0
+ %246:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 246, implicit $exec, implicit $mode, implicit-def $m0
+ %247:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 247, implicit $exec, implicit $mode, implicit-def $m0
+ %248:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 248, implicit $exec, implicit $mode, implicit-def $m0
+ %249:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 249, implicit $exec, implicit $mode, implicit-def $m0
+ %250:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 250, implicit $exec, implicit $mode, implicit-def $m0
+ %251:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 251, implicit $exec, implicit $mode, implicit-def $m0
+ %252:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 252, implicit $exec, implicit $mode, implicit-def $m0
+ %253:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 253, implicit $exec, implicit $mode, implicit-def $m0
+ %254:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 254, implicit $exec, implicit $mode, implicit-def $m0
+ %255:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 255, implicit $exec, implicit $mode
+ %256:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 256, implicit $exec, implicit $mode
+
+ %257:agpr_32 = IMPLICIT_DEF
+
+ bb.1:
+
+ S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9
+ S_NOP 0, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19
+ S_NOP 0, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29
+ S_NOP 0, implicit %30, implicit %31, implicit %32, implicit %33, implicit %34, implicit %35, implicit %36, implicit %37, implicit %38, implicit %39
+ S_NOP 0, implicit %40, implicit %41, implicit %42, implicit %43, implicit %44, implicit %45, implicit %46, implicit %47, implicit %48, implicit %49
+ S_NOP 0, implicit %50, implicit %51, implicit %52, implicit %53, implicit %54, implicit %55, implicit %56, implicit %57, implicit %58, implicit %59
+ S_NOP 0, implicit %60, implicit %61, implicit %62, implicit %63, implicit %64, implicit %65, implicit %66, implicit %67, implicit %68, implicit %69
+ S_NOP 0, implicit %70, implicit %71, implicit %72, implicit %73, implicit %74, implicit %75, implicit %76, implicit %77, implicit %78, implicit %79
+ S_NOP 0, implicit %80, implicit %81, implicit %82, implicit %83, implicit %84, implicit %85, implicit %86, implicit %87, implicit %88, implicit %89
+ S_NOP 0, implicit %90, implicit %91, implicit %92, implicit %93, implicit %94, implicit %95, implicit %96, implicit %97, implicit %98, implicit %99
+ S_NOP 0, implicit %100, implicit %101, implicit %102, implicit %103, implicit %104, implicit %105, implicit %106, implicit %107, implicit %108, implicit %109
+ S_NOP 0, implicit %110, implicit %111, implicit %112, implicit %113, implicit %114, implicit %115, implicit %116, implicit %117, implicit %118, implicit %119
+ S_NOP 0, implicit %120, implicit %121, implicit %122, implicit %123, implicit %124, implicit %125, implicit %126, implicit %127, implicit %128, implicit %129
+ S_NOP 0, implicit %130, implicit %131, implicit %132, implicit %133, implicit %134, implicit %135, implicit %136, implicit %137, implicit %138, implicit %139
+ S_NOP 0, implicit %140, implicit %141, implicit %142, implicit %143, implicit %144, implicit %145, implicit %146, implicit %147, implicit %148, implicit %149
+ S_NOP 0, implicit %150, implicit %151, implicit %152, implicit %153, implicit %154, implicit %155, implicit %156, implicit %157, implicit %158, implicit %159
+ S_NOP 0, implicit %160, implicit %161, implicit %162, implicit %163, implicit %164, implicit %165, implicit %166, implicit %167, implicit %168, implicit %169
+ S_NOP 0, implicit %170, implicit %171, implicit %172, implicit %173, implicit %174, implicit %175, implicit %176, implicit %177, implicit %178, implicit %179
+ S_NOP 0, implicit %180, implicit %181, implicit %182, implicit %183, implicit %184, implicit %185, implicit %186, implicit %187, implicit %188, implicit %189
+ S_NOP 0, implicit %190, implicit %191, implicit %192, implicit %193, implicit %194, implicit %195, implicit %196, implicit %197, implicit %198, implicit %199
+ S_NOP 0, implicit %200, implicit %201, implicit %202, implicit %203, implicit %204, implicit %205, implicit %206, implicit %207, implicit %208, implicit %209
+ S_NOP 0, implicit %210, implicit %211, implicit %212, implicit %213, implicit %214, implicit %215, implicit %216, implicit %217, implicit %218, implicit %219
+ S_NOP 0, implicit %220, implicit %221, implicit %222, implicit %223, implicit %224, implicit %225, implicit %226, implicit %227, implicit %228, implicit %229
+ S_NOP 0, implicit %230, implicit %231, implicit %232, implicit %233, implicit %234, implicit %235, implicit %236, implicit %237, implicit %238, implicit %239
+ S_NOP 0, implicit %240, implicit %241, implicit %242, implicit %243, implicit %244, implicit %245, implicit %246, implicit %247, implicit %248, implicit %249
+ S_NOP 0, implicit %250, implicit %251, implicit %252, implicit %253, implicit %254, implicit %255, implicit %256, implicit %257
+
+ S_ENDPGM 0
+...
+---
+name: reduce_spill_agpr_above_addressable_limit
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; GFX908-LABEL: name: reduce_spill_agpr_above_addressable_limit
+ ; GFX908: bb.0:
+ ; GFX908-NEXT: successors: %bb.1(0x80000000)
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 258, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF1:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF2:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF3:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF4:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF5:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF6:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF7:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF8:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF9:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF10:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF11:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF12:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF13:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF14:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF15:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF16:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF17:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF18:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF19:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF20:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF21:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF22:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF23:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF24:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF25:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF26:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF27:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF28:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF29:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF30:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF31:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF32:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF33:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF34:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF35:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF36:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF37:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF38:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF39:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF40:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF41:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF42:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF43:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF44:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF45:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF46:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF47:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF48:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF49:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF50:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF51:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF52:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF53:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF54:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF55:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF56:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF57:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF58:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF59:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF60:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF61:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF62:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF63:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF64:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF65:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF66:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF67:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF68:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF69:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF70:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF71:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF72:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF73:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF74:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF75:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF76:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF77:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF78:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF79:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF80:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF81:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF82:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF83:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF84:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF85:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF86:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF87:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF88:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF89:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF90:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF91:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF92:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF93:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF94:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF95:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF96:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF97:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF98:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF99:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF100:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF101:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF102:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF103:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF104:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF105:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF106:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF107:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF108:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF109:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF110:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF111:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF112:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF113:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF114:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF115:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF116:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF117:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF118:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF119:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF120:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF121:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF122:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF123:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF124:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF125:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF126:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF127:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF128:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF129:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF130:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF131:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF132:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF133:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF134:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF135:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF136:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF137:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF138:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF139:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF140:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF141:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF142:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF143:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF144:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF145:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF146:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF147:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF148:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF149:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF150:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF151:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF152:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF153:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF154:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF155:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF156:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF157:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF158:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF159:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF160:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF161:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF162:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF163:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF164:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF165:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF166:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF167:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF168:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF169:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF170:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF171:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF172:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF173:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF174:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF175:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF176:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF177:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF178:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF179:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF180:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF181:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF182:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF183:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF184:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF185:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF186:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF187:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF188:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF189:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF190:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF191:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF192:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF193:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF194:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF195:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF196:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF197:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF198:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF199:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF200:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF201:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF202:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF203:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF204:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF205:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF206:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF207:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF208:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF209:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF210:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF211:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF212:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF213:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF214:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF215:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF216:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF217:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF218:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF219:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF220:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF221:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF222:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF223:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF224:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF225:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF226:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF227:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF228:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF229:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF230:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF231:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF232:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF233:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF234:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF235:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF236:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF237:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF238:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF239:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF240:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF241:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF242:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF243:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF244:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF245:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF246:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF247:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF248:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF249:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF250:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF251:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF252:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF253:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF254:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF255:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF256:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.1:
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]], implicit [[DEF9]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]], implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]], implicit [[DEF18]], implicit [[DEF19]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]], implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]], implicit [[DEF28]], implicit [[DEF29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF30]], implicit [[DEF31]], implicit [[DEF32]], implicit [[DEF33]], implicit [[DEF34]], implicit [[DEF35]], implicit [[DEF36]], implicit [[DEF37]], implicit [[DEF38]], implicit [[DEF39]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF40]], implicit [[DEF41]], implicit [[DEF42]], implicit [[DEF43]], implicit [[DEF44]], implicit [[DEF45]], implicit [[DEF46]], implicit [[DEF47]], implicit [[DEF48]], implicit [[DEF49]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF50]], implicit [[DEF51]], implicit [[DEF52]], implicit [[DEF53]], implicit [[DEF54]], implicit [[DEF55]], implicit [[DEF56]], implicit [[DEF57]], implicit [[DEF58]], implicit [[DEF59]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF60]], implicit [[DEF61]], implicit [[DEF62]], implicit [[DEF63]], implicit [[DEF64]], implicit [[DEF65]], implicit [[DEF66]], implicit [[DEF67]], implicit [[DEF68]], implicit [[DEF69]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF70]], implicit [[DEF71]], implicit [[DEF72]], implicit [[DEF73]], implicit [[DEF74]], implicit [[DEF75]], implicit [[DEF76]], implicit [[DEF77]], implicit [[DEF78]], implicit [[DEF79]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF80]], implicit [[DEF81]], implicit [[DEF82]], implicit [[DEF83]], implicit [[DEF84]], implicit [[DEF85]], implicit [[DEF86]], implicit [[DEF87]], implicit [[DEF88]], implicit [[DEF89]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF90]], implicit [[DEF91]], implicit [[DEF92]], implicit [[DEF93]], implicit [[DEF94]], implicit [[DEF95]], implicit [[DEF96]], implicit [[DEF97]], implicit [[DEF98]], implicit [[DEF99]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF100]], implicit [[DEF101]], implicit [[DEF102]], implicit [[DEF103]], implicit [[DEF104]], implicit [[DEF105]], implicit [[DEF106]], implicit [[DEF107]], implicit [[DEF108]], implicit [[DEF109]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF110]], implicit [[DEF111]], implicit [[DEF112]], implicit [[DEF113]], implicit [[DEF114]], implicit [[DEF115]], implicit [[DEF116]], implicit [[DEF117]], implicit [[DEF118]], implicit [[DEF119]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF120]], implicit [[DEF121]], implicit [[DEF122]], implicit [[DEF123]], implicit [[DEF124]], implicit [[DEF125]], implicit [[DEF126]], implicit [[DEF127]], implicit [[DEF128]], implicit [[DEF129]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF130]], implicit [[DEF131]], implicit [[DEF132]], implicit [[DEF133]], implicit [[DEF134]], implicit [[DEF135]], implicit [[DEF136]], implicit [[DEF137]], implicit [[DEF138]], implicit [[DEF139]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF140]], implicit [[DEF141]], implicit [[DEF142]], implicit [[DEF143]], implicit [[DEF144]], implicit [[DEF145]], implicit [[DEF146]], implicit [[DEF147]], implicit [[DEF148]], implicit [[DEF149]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF150]], implicit [[DEF151]], implicit [[DEF152]], implicit [[DEF153]], implicit [[DEF154]], implicit [[DEF155]], implicit [[DEF156]], implicit [[DEF157]], implicit [[DEF158]], implicit [[DEF159]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF160]], implicit [[DEF161]], implicit [[DEF162]], implicit [[DEF163]], implicit [[DEF164]], implicit [[DEF165]], implicit [[DEF166]], implicit [[DEF167]], implicit [[DEF168]], implicit [[DEF169]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF170]], implicit [[DEF171]], implicit [[DEF172]], implicit [[DEF173]], implicit [[DEF174]], implicit [[DEF175]], implicit [[DEF176]], implicit [[DEF177]], implicit [[DEF178]], implicit [[DEF179]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF180]], implicit [[DEF181]], implicit [[DEF182]], implicit [[DEF183]], implicit [[DEF184]], implicit [[DEF185]], implicit [[DEF186]], implicit [[DEF187]], implicit [[DEF188]], implicit [[DEF189]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF190]], implicit [[DEF191]], implicit [[DEF192]], implicit [[DEF193]], implicit [[DEF194]], implicit [[DEF195]], implicit [[DEF196]], implicit [[DEF197]], implicit [[DEF198]], implicit [[DEF199]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF200]], implicit [[DEF201]], implicit [[DEF202]], implicit [[DEF203]], implicit [[DEF204]], implicit [[DEF205]], implicit [[DEF206]], implicit [[DEF207]], implicit [[DEF208]], implicit [[DEF209]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF210]], implicit [[DEF211]], implicit [[DEF212]], implicit [[DEF213]], implicit [[DEF214]], implicit [[DEF215]], implicit [[DEF216]], implicit [[DEF217]], implicit [[DEF218]], implicit [[DEF219]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF220]], implicit [[DEF221]], implicit [[DEF222]], implicit [[DEF223]], implicit [[DEF224]], implicit [[DEF225]], implicit [[DEF226]], implicit [[DEF227]], implicit [[DEF228]], implicit [[DEF229]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF230]], implicit [[DEF231]], implicit [[DEF232]], implicit [[DEF233]], implicit [[DEF234]], implicit [[DEF235]], implicit [[DEF236]], implicit [[DEF237]], implicit [[DEF238]], implicit [[DEF239]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF240]], implicit [[DEF241]], implicit [[DEF242]], implicit [[DEF243]], implicit [[DEF244]], implicit [[DEF245]], implicit [[DEF246]], implicit [[DEF247]], implicit [[DEF248]], implicit [[DEF249]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 257, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF250]], implicit [[DEF251]], implicit [[DEF252]], implicit [[DEF253]], implicit [[DEF254]], implicit [[DEF255]], implicit [[DEF256]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_]]
+ ; GFX908-NEXT: S_ENDPGM 0
+ ;
+ ; GFX90A-LABEL: name: reduce_spill_agpr_above_addressable_limit
+ ; GFX90A: bb.0:
+ ; GFX90A-NEXT: successors: %bb.1(0x80000000)
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF1:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF2:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF3:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF4:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF5:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF6:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF7:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF8:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF9:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF10:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF11:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF12:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF13:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF14:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF15:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF16:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF17:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF18:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF19:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF20:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF21:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF22:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF23:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF24:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF25:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF26:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF27:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF28:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF29:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF30:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF31:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF32:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF33:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF34:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF35:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF36:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF37:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF38:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF39:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF40:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF41:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF42:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF43:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF44:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF45:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF46:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF47:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF48:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF49:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF50:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF51:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF52:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF53:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF54:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF55:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF56:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF57:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF58:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF59:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF60:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF61:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF62:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF63:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF64:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF65:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF66:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF67:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF68:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF69:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF70:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF71:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF72:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF73:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF74:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF75:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF76:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF77:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF78:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF79:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF80:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF81:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF82:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF83:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF84:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF85:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF86:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF87:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF88:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF89:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF90:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF91:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF92:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF93:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF94:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF95:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF96:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF97:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF98:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF99:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF100:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF101:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF102:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF103:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF104:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF105:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF106:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF107:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF108:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF109:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF110:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF111:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF112:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF113:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF114:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF115:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF116:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF117:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF118:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF119:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF120:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF121:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF122:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF123:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF124:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF125:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF126:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF127:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF128:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF129:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF130:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF131:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF132:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF133:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF134:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF135:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF136:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF137:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF138:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF139:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF140:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF141:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF142:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF143:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF144:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF145:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF146:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF147:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF148:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF149:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF150:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF151:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF152:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF153:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF154:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF155:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF156:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF157:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF158:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF159:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF160:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF161:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF162:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF163:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF164:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF165:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF166:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF167:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF168:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF169:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF170:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF171:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF172:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF173:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF174:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF175:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF176:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF177:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF178:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF179:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF180:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF181:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF182:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF183:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF184:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF185:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF186:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF187:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF188:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF189:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF190:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF191:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF192:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF193:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF194:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF195:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF196:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF197:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF198:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF199:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF200:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF201:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF202:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF203:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF204:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF205:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF206:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF207:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF208:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF209:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF210:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF211:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF212:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF213:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF214:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF215:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF216:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF217:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF218:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF219:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF220:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF221:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF222:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF223:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF224:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF225:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF226:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF227:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF228:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF229:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF230:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF231:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF232:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF233:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF234:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF235:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF236:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF237:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF238:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF239:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF240:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF241:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF242:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF243:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF244:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF245:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF246:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF247:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF248:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF249:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF250:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF251:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF252:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF253:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF254:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF255:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF256:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 258, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: bb.1:
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]], implicit [[DEF9]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]], implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]], implicit [[DEF18]], implicit [[DEF19]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]], implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]], implicit [[DEF28]], implicit [[DEF29]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF30]], implicit [[DEF31]], implicit [[DEF32]], implicit [[DEF33]], implicit [[DEF34]], implicit [[DEF35]], implicit [[DEF36]], implicit [[DEF37]], implicit [[DEF38]], implicit [[DEF39]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF40]], implicit [[DEF41]], implicit [[DEF42]], implicit [[DEF43]], implicit [[DEF44]], implicit [[DEF45]], implicit [[DEF46]], implicit [[DEF47]], implicit [[DEF48]], implicit [[DEF49]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF50]], implicit [[DEF51]], implicit [[DEF52]], implicit [[DEF53]], implicit [[DEF54]], implicit [[DEF55]], implicit [[DEF56]], implicit [[DEF57]], implicit [[DEF58]], implicit [[DEF59]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF60]], implicit [[DEF61]], implicit [[DEF62]], implicit [[DEF63]], implicit [[DEF64]], implicit [[DEF65]], implicit [[DEF66]], implicit [[DEF67]], implicit [[DEF68]], implicit [[DEF69]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF70]], implicit [[DEF71]], implicit [[DEF72]], implicit [[DEF73]], implicit [[DEF74]], implicit [[DEF75]], implicit [[DEF76]], implicit [[DEF77]], implicit [[DEF78]], implicit [[DEF79]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF80]], implicit [[DEF81]], implicit [[DEF82]], implicit [[DEF83]], implicit [[DEF84]], implicit [[DEF85]], implicit [[DEF86]], implicit [[DEF87]], implicit [[DEF88]], implicit [[DEF89]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF90]], implicit [[DEF91]], implicit [[DEF92]], implicit [[DEF93]], implicit [[DEF94]], implicit [[DEF95]], implicit [[DEF96]], implicit [[DEF97]], implicit [[DEF98]], implicit [[DEF99]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF100]], implicit [[DEF101]], implicit [[DEF102]], implicit [[DEF103]], implicit [[DEF104]], implicit [[DEF105]], implicit [[DEF106]], implicit [[DEF107]], implicit [[DEF108]], implicit [[DEF109]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF110]], implicit [[DEF111]], implicit [[DEF112]], implicit [[DEF113]], implicit [[DEF114]], implicit [[DEF115]], implicit [[DEF116]], implicit [[DEF117]], implicit [[DEF118]], implicit [[DEF119]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF120]], implicit [[DEF121]], implicit [[DEF122]], implicit [[DEF123]], implicit [[DEF124]], implicit [[DEF125]], implicit [[DEF126]], implicit [[DEF127]], implicit [[DEF128]], implicit [[DEF129]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF130]], implicit [[DEF131]], implicit [[DEF132]], implicit [[DEF133]], implicit [[DEF134]], implicit [[DEF135]], implicit [[DEF136]], implicit [[DEF137]], implicit [[DEF138]], implicit [[DEF139]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF140]], implicit [[DEF141]], implicit [[DEF142]], implicit [[DEF143]], implicit [[DEF144]], implicit [[DEF145]], implicit [[DEF146]], implicit [[DEF147]], implicit [[DEF148]], implicit [[DEF149]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF150]], implicit [[DEF151]], implicit [[DEF152]], implicit [[DEF153]], implicit [[DEF154]], implicit [[DEF155]], implicit [[DEF156]], implicit [[DEF157]], implicit [[DEF158]], implicit [[DEF159]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF160]], implicit [[DEF161]], implicit [[DEF162]], implicit [[DEF163]], implicit [[DEF164]], implicit [[DEF165]], implicit [[DEF166]], implicit [[DEF167]], implicit [[DEF168]], implicit [[DEF169]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF170]], implicit [[DEF171]], implicit [[DEF172]], implicit [[DEF173]], implicit [[DEF174]], implicit [[DEF175]], implicit [[DEF176]], implicit [[DEF177]], implicit [[DEF178]], implicit [[DEF179]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF180]], implicit [[DEF181]], implicit [[DEF182]], implicit [[DEF183]], implicit [[DEF184]], implicit [[DEF185]], implicit [[DEF186]], implicit [[DEF187]], implicit [[DEF188]], implicit [[DEF189]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF190]], implicit [[DEF191]], implicit [[DEF192]], implicit [[DEF193]], implicit [[DEF194]], implicit [[DEF195]], implicit [[DEF196]], implicit [[DEF197]], implicit [[DEF198]], implicit [[DEF199]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF200]], implicit [[DEF201]], implicit [[DEF202]], implicit [[DEF203]], implicit [[DEF204]], implicit [[DEF205]], implicit [[DEF206]], implicit [[DEF207]], implicit [[DEF208]], implicit [[DEF209]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF210]], implicit [[DEF211]], implicit [[DEF212]], implicit [[DEF213]], implicit [[DEF214]], implicit [[DEF215]], implicit [[DEF216]], implicit [[DEF217]], implicit [[DEF218]], implicit [[DEF219]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF220]], implicit [[DEF221]], implicit [[DEF222]], implicit [[DEF223]], implicit [[DEF224]], implicit [[DEF225]], implicit [[DEF226]], implicit [[DEF227]], implicit [[DEF228]], implicit [[DEF229]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF230]], implicit [[DEF231]], implicit [[DEF232]], implicit [[DEF233]], implicit [[DEF234]], implicit [[DEF235]], implicit [[DEF236]], implicit [[DEF237]], implicit [[DEF238]], implicit [[DEF239]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF240]], implicit [[DEF241]], implicit [[DEF242]], implicit [[DEF243]], implicit [[DEF244]], implicit [[DEF245]], implicit [[DEF246]], implicit [[DEF247]], implicit [[DEF248]], implicit [[DEF249]]
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 257, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF250]], implicit [[DEF251]], implicit [[DEF252]], implicit [[DEF253]], implicit [[DEF254]], implicit [[DEF255]], implicit [[DEF256]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_]]
+ ; GFX90A-NEXT: S_ENDPGM 0
+
+ bb.0:
+ successors: %bb.1
+
+ %0:agpr_32 = IMPLICIT_DEF
+ %1:agpr_32 = IMPLICIT_DEF
+ %2:agpr_32 = IMPLICIT_DEF
+ %3:agpr_32 = IMPLICIT_DEF
+ %4:agpr_32 = IMPLICIT_DEF
+ %5:agpr_32 = IMPLICIT_DEF
+ %6:agpr_32 = IMPLICIT_DEF
+ %7:agpr_32 = IMPLICIT_DEF
+ %8:agpr_32 = IMPLICIT_DEF
+ %9:agpr_32 = IMPLICIT_DEF
+ %10:agpr_32 = IMPLICIT_DEF
+ %11:agpr_32 = IMPLICIT_DEF
+ %12:agpr_32 = IMPLICIT_DEF
+ %13:agpr_32 = IMPLICIT_DEF
+ %14:agpr_32 = IMPLICIT_DEF
+ %15:agpr_32 = IMPLICIT_DEF
+ %16:agpr_32 = IMPLICIT_DEF
+ %17:agpr_32 = IMPLICIT_DEF
+ %18:agpr_32 = IMPLICIT_DEF
+ %19:agpr_32 = IMPLICIT_DEF
+ %20:agpr_32 = IMPLICIT_DEF
+ %21:agpr_32 = IMPLICIT_DEF
+ %22:agpr_32 = IMPLICIT_DEF
+ %23:agpr_32 = IMPLICIT_DEF
+ %24:agpr_32 = IMPLICIT_DEF
+ %25:agpr_32 = IMPLICIT_DEF
+ %26:agpr_32 = IMPLICIT_DEF
+ %27:agpr_32 = IMPLICIT_DEF
+ %28:agpr_32 = IMPLICIT_DEF
+ %29:agpr_32 = IMPLICIT_DEF
+ %30:agpr_32 = IMPLICIT_DEF
+ %31:agpr_32 = IMPLICIT_DEF
+ %32:agpr_32 = IMPLICIT_DEF
+ %33:agpr_32 = IMPLICIT_DEF
+ %34:agpr_32 = IMPLICIT_DEF
+ %35:agpr_32 = IMPLICIT_DEF
+ %36:agpr_32 = IMPLICIT_DEF
+ %37:agpr_32 = IMPLICIT_DEF
+ %38:agpr_32 = IMPLICIT_DEF
+ %39:agpr_32 = IMPLICIT_DEF
+ %40:agpr_32 = IMPLICIT_DEF
+ %41:agpr_32 = IMPLICIT_DEF
+ %42:agpr_32 = IMPLICIT_DEF
+ %43:agpr_32 = IMPLICIT_DEF
+ %44:agpr_32 = IMPLICIT_DEF
+ %45:agpr_32 = IMPLICIT_DEF
+ %46:agpr_32 = IMPLICIT_DEF
+ %47:agpr_32 = IMPLICIT_DEF
+ %48:agpr_32 = IMPLICIT_DEF
+ %49:agpr_32 = IMPLICIT_DEF
+ %50:agpr_32 = IMPLICIT_DEF
+ %51:agpr_32 = IMPLICIT_DEF
+ %52:agpr_32 = IMPLICIT_DEF
+ %53:agpr_32 = IMPLICIT_DEF
+ %54:agpr_32 = IMPLICIT_DEF
+ %55:agpr_32 = IMPLICIT_DEF
+ %56:agpr_32 = IMPLICIT_DEF
+ %57:agpr_32 = IMPLICIT_DEF
+ %58:agpr_32 = IMPLICIT_DEF
+ %59:agpr_32 = IMPLICIT_DEF
+ %60:agpr_32 = IMPLICIT_DEF
+ %61:agpr_32 = IMPLICIT_DEF
+ %62:agpr_32 = IMPLICIT_DEF
+ %63:agpr_32 = IMPLICIT_DEF
+ %64:agpr_32 = IMPLICIT_DEF
+ %65:agpr_32 = IMPLICIT_DEF
+ %66:agpr_32 = IMPLICIT_DEF
+ %67:agpr_32 = IMPLICIT_DEF
+ %68:agpr_32 = IMPLICIT_DEF
+ %69:agpr_32 = IMPLICIT_DEF
+ %70:agpr_32 = IMPLICIT_DEF
+ %71:agpr_32 = IMPLICIT_DEF
+ %72:agpr_32 = IMPLICIT_DEF
+ %73:agpr_32 = IMPLICIT_DEF
+ %74:agpr_32 = IMPLICIT_DEF
+ %75:agpr_32 = IMPLICIT_DEF
+ %76:agpr_32 = IMPLICIT_DEF
+ %77:agpr_32 = IMPLICIT_DEF
+ %78:agpr_32 = IMPLICIT_DEF
+ %79:agpr_32 = IMPLICIT_DEF
+ %80:agpr_32 = IMPLICIT_DEF
+ %81:agpr_32 = IMPLICIT_DEF
+ %82:agpr_32 = IMPLICIT_DEF
+ %83:agpr_32 = IMPLICIT_DEF
+ %84:agpr_32 = IMPLICIT_DEF
+ %85:agpr_32 = IMPLICIT_DEF
+ %86:agpr_32 = IMPLICIT_DEF
+ %87:agpr_32 = IMPLICIT_DEF
+ %88:agpr_32 = IMPLICIT_DEF
+ %89:agpr_32 = IMPLICIT_DEF
+ %90:agpr_32 = IMPLICIT_DEF
+ %91:agpr_32 = IMPLICIT_DEF
+ %92:agpr_32 = IMPLICIT_DEF
+ %93:agpr_32 = IMPLICIT_DEF
+ %94:agpr_32 = IMPLICIT_DEF
+ %95:agpr_32 = IMPLICIT_DEF
+ %96:agpr_32 = IMPLICIT_DEF
+ %97:agpr_32 = IMPLICIT_DEF
+ %98:agpr_32 = IMPLICIT_DEF
+ %99:agpr_32 = IMPLICIT_DEF
+ %100:agpr_32 = IMPLICIT_DEF
+ %101:agpr_32 = IMPLICIT_DEF
+ %102:agpr_32 = IMPLICIT_DEF
+ %103:agpr_32 = IMPLICIT_DEF
+ %104:agpr_32 = IMPLICIT_DEF
+ %105:agpr_32 = IMPLICIT_DEF
+ %106:agpr_32 = IMPLICIT_DEF
+ %107:agpr_32 = IMPLICIT_DEF
+ %108:agpr_32 = IMPLICIT_DEF
+ %109:agpr_32 = IMPLICIT_DEF
+ %110:agpr_32 = IMPLICIT_DEF
+ %111:agpr_32 = IMPLICIT_DEF
+ %112:agpr_32 = IMPLICIT_DEF
+ %113:agpr_32 = IMPLICIT_DEF
+ %114:agpr_32 = IMPLICIT_DEF
+ %115:agpr_32 = IMPLICIT_DEF
+ %116:agpr_32 = IMPLICIT_DEF
+ %117:agpr_32 = IMPLICIT_DEF
+ %118:agpr_32 = IMPLICIT_DEF
+ %119:agpr_32 = IMPLICIT_DEF
+ %120:agpr_32 = IMPLICIT_DEF
+ %121:agpr_32 = IMPLICIT_DEF
+ %122:agpr_32 = IMPLICIT_DEF
+ %123:agpr_32 = IMPLICIT_DEF
+ %124:agpr_32 = IMPLICIT_DEF
+ %125:agpr_32 = IMPLICIT_DEF
+ %126:agpr_32 = IMPLICIT_DEF
+ %127:agpr_32 = IMPLICIT_DEF
+ %128:agpr_32 = IMPLICIT_DEF
+ %129:agpr_32 = IMPLICIT_DEF
+ %130:agpr_32 = IMPLICIT_DEF
+ %131:agpr_32 = IMPLICIT_DEF
+ %132:agpr_32 = IMPLICIT_DEF
+ %133:agpr_32 = IMPLICIT_DEF
+ %134:agpr_32 = IMPLICIT_DEF
+ %135:agpr_32 = IMPLICIT_DEF
+ %136:agpr_32 = IMPLICIT_DEF
+ %137:agpr_32 = IMPLICIT_DEF
+ %138:agpr_32 = IMPLICIT_DEF
+ %139:agpr_32 = IMPLICIT_DEF
+ %140:agpr_32 = IMPLICIT_DEF
+ %141:agpr_32 = IMPLICIT_DEF
+ %142:agpr_32 = IMPLICIT_DEF
+ %143:agpr_32 = IMPLICIT_DEF
+ %144:agpr_32 = IMPLICIT_DEF
+ %145:agpr_32 = IMPLICIT_DEF
+ %146:agpr_32 = IMPLICIT_DEF
+ %147:agpr_32 = IMPLICIT_DEF
+ %148:agpr_32 = IMPLICIT_DEF
+ %149:agpr_32 = IMPLICIT_DEF
+ %150:agpr_32 = IMPLICIT_DEF
+ %151:agpr_32 = IMPLICIT_DEF
+ %152:agpr_32 = IMPLICIT_DEF
+ %153:agpr_32 = IMPLICIT_DEF
+ %154:agpr_32 = IMPLICIT_DEF
+ %155:agpr_32 = IMPLICIT_DEF
+ %156:agpr_32 = IMPLICIT_DEF
+ %157:agpr_32 = IMPLICIT_DEF
+ %158:agpr_32 = IMPLICIT_DEF
+ %159:agpr_32 = IMPLICIT_DEF
+ %160:agpr_32 = IMPLICIT_DEF
+ %161:agpr_32 = IMPLICIT_DEF
+ %162:agpr_32 = IMPLICIT_DEF
+ %163:agpr_32 = IMPLICIT_DEF
+ %164:agpr_32 = IMPLICIT_DEF
+ %165:agpr_32 = IMPLICIT_DEF
+ %166:agpr_32 = IMPLICIT_DEF
+ %167:agpr_32 = IMPLICIT_DEF
+ %168:agpr_32 = IMPLICIT_DEF
+ %169:agpr_32 = IMPLICIT_DEF
+ %170:agpr_32 = IMPLICIT_DEF
+ %171:agpr_32 = IMPLICIT_DEF
+ %172:agpr_32 = IMPLICIT_DEF
+ %173:agpr_32 = IMPLICIT_DEF
+ %174:agpr_32 = IMPLICIT_DEF
+ %175:agpr_32 = IMPLICIT_DEF
+ %176:agpr_32 = IMPLICIT_DEF
+ %177:agpr_32 = IMPLICIT_DEF
+ %178:agpr_32 = IMPLICIT_DEF
+ %179:agpr_32 = IMPLICIT_DEF
+ %180:agpr_32 = IMPLICIT_DEF
+ %181:agpr_32 = IMPLICIT_DEF
+ %182:agpr_32 = IMPLICIT_DEF
+ %183:agpr_32 = IMPLICIT_DEF
+ %184:agpr_32 = IMPLICIT_DEF
+ %185:agpr_32 = IMPLICIT_DEF
+ %186:agpr_32 = IMPLICIT_DEF
+ %187:agpr_32 = IMPLICIT_DEF
+ %188:agpr_32 = IMPLICIT_DEF
+ %189:agpr_32 = IMPLICIT_DEF
+ %190:agpr_32 = IMPLICIT_DEF
+ %191:agpr_32 = IMPLICIT_DEF
+ %192:agpr_32 = IMPLICIT_DEF
+ %193:agpr_32 = IMPLICIT_DEF
+ %194:agpr_32 = IMPLICIT_DEF
+ %195:agpr_32 = IMPLICIT_DEF
+ %196:agpr_32 = IMPLICIT_DEF
+ %197:agpr_32 = IMPLICIT_DEF
+ %198:agpr_32 = IMPLICIT_DEF
+ %199:agpr_32 = IMPLICIT_DEF
+ %200:agpr_32 = IMPLICIT_DEF
+ %201:agpr_32 = IMPLICIT_DEF
+ %202:agpr_32 = IMPLICIT_DEF
+ %203:agpr_32 = IMPLICIT_DEF
+ %204:agpr_32 = IMPLICIT_DEF
+ %205:agpr_32 = IMPLICIT_DEF
+ %206:agpr_32 = IMPLICIT_DEF
+ %207:agpr_32 = IMPLICIT_DEF
+ %208:agpr_32 = IMPLICIT_DEF
+ %209:agpr_32 = IMPLICIT_DEF
+ %210:agpr_32 = IMPLICIT_DEF
+ %211:agpr_32 = IMPLICIT_DEF
+ %212:agpr_32 = IMPLICIT_DEF
+ %213:agpr_32 = IMPLICIT_DEF
+ %214:agpr_32 = IMPLICIT_DEF
+ %215:agpr_32 = IMPLICIT_DEF
+ %216:agpr_32 = IMPLICIT_DEF
+ %217:agpr_32 = IMPLICIT_DEF
+ %218:agpr_32 = IMPLICIT_DEF
+ %219:agpr_32 = IMPLICIT_DEF
+ %220:agpr_32 = IMPLICIT_DEF
+ %221:agpr_32 = IMPLICIT_DEF
+ %222:agpr_32 = IMPLICIT_DEF
+ %223:agpr_32 = IMPLICIT_DEF
+ %224:agpr_32 = IMPLICIT_DEF
+ %225:agpr_32 = IMPLICIT_DEF
+ %226:agpr_32 = IMPLICIT_DEF
+ %227:agpr_32 = IMPLICIT_DEF
+ %228:agpr_32 = IMPLICIT_DEF
+ %229:agpr_32 = IMPLICIT_DEF
+ %230:agpr_32 = IMPLICIT_DEF
+ %231:agpr_32 = IMPLICIT_DEF
+ %232:agpr_32 = IMPLICIT_DEF
+ %233:agpr_32 = IMPLICIT_DEF
+ %234:agpr_32 = IMPLICIT_DEF
+ %235:agpr_32 = IMPLICIT_DEF
+ %236:agpr_32 = IMPLICIT_DEF
+ %237:agpr_32 = IMPLICIT_DEF
+ %238:agpr_32 = IMPLICIT_DEF
+ %239:agpr_32 = IMPLICIT_DEF
+ %240:agpr_32 = IMPLICIT_DEF
+ %241:agpr_32 = IMPLICIT_DEF
+ %242:agpr_32 = IMPLICIT_DEF
+ %243:agpr_32 = IMPLICIT_DEF
+ %244:agpr_32 = IMPLICIT_DEF
+ %245:agpr_32 = IMPLICIT_DEF
+ %246:agpr_32 = IMPLICIT_DEF
+ %247:agpr_32 = IMPLICIT_DEF
+ %248:agpr_32 = IMPLICIT_DEF
+ %249:agpr_32 = IMPLICIT_DEF
+ %250:agpr_32 = IMPLICIT_DEF
+ %251:agpr_32 = IMPLICIT_DEF
+ %252:agpr_32 = IMPLICIT_DEF
+ %253:agpr_32 = IMPLICIT_DEF
+ %254:agpr_32 = IMPLICIT_DEF
+ %255:agpr_32 = IMPLICIT_DEF
+ %256:agpr_32 = IMPLICIT_DEF
+
+ %257:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 257, implicit $exec, implicit $mode
+ %258:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 258, implicit $exec, implicit $mode
+
+ bb.1:
+
+ S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9
+ S_NOP 0, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19
+ S_NOP 0, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29
+ S_NOP 0, implicit %30, implicit %31, implicit %32, implicit %33, implicit %34, implicit %35, implicit %36, implicit %37, implicit %38, implicit %39
+ S_NOP 0, implicit %40, implicit %41, implicit %42, implicit %43, implicit %44, implicit %45, implicit %46, implicit %47, implicit %48, implicit %49
+ S_NOP 0, implicit %50, implicit %51, implicit %52, implicit %53, implicit %54, implicit %55, implicit %56, implicit %57, implicit %58, implicit %59
+ S_NOP 0, implicit %60, implicit %61, implicit %62, implicit %63, implicit %64, implicit %65, implicit %66, implicit %67, implicit %68, implicit %69
+ S_NOP 0, implicit %70, implicit %71, implicit %72, implicit %73, implicit %74, implicit %75, implicit %76, implicit %77, implicit %78, implicit %79
+ S_NOP 0, implicit %80, implicit %81, implicit %82, implicit %83, implicit %84, implicit %85, implicit %86, implicit %87, implicit %88, implicit %89
+ S_NOP 0, implicit %90, implicit %91, implicit %92, implicit %93, implicit %94, implicit %95, implicit %96, implicit %97, implicit %98, implicit %99
+ S_NOP 0, implicit %100, implicit %101, implicit %102, implicit %103, implicit %104, implicit %105, implicit %106, implicit %107, implicit %108, implicit %109
+ S_NOP 0, implicit %110, implicit %111, implicit %112, implicit %113, implicit %114, implicit %115, implicit %116, implicit %117, implicit %118, implicit %119
+ S_NOP 0, implicit %120, implicit %121, implicit %122, implicit %123, implicit %124, implicit %125, implicit %126, implicit %127, implicit %128, implicit %129
+ S_NOP 0, implicit %130, implicit %131, implicit %132, implicit %133, implicit %134, implicit %135, implicit %136, implicit %137, implicit %138, implicit %139
+ S_NOP 0, implicit %140, implicit %141, implicit %142, implicit %143, implicit %144, implicit %145, implicit %146, implicit %147, implicit %148, implicit %149
+ S_NOP 0, implicit %150, implicit %151, implicit %152, implicit %153, implicit %154, implicit %155, implicit %156, implicit %157, implicit %158, implicit %159
+ S_NOP 0, implicit %160, implicit %161, implicit %162, implicit %163, implicit %164, implicit %165, implicit %166, implicit %167, implicit %168, implicit %169
+ S_NOP 0, implicit %170, implicit %171, implicit %172, implicit %173, implicit %174, implicit %175, implicit %176, implicit %177, implicit %178, implicit %179
+ S_NOP 0, implicit %180, implicit %181, implicit %182, implicit %183, implicit %184, implicit %185, implicit %186, implicit %187, implicit %188, implicit %189
+ S_NOP 0, implicit %190, implicit %191, implicit %192, implicit %193, implicit %194, implicit %195, implicit %196, implicit %197, implicit %198, implicit %199
+ S_NOP 0, implicit %200, implicit %201, implicit %202, implicit %203, implicit %204, implicit %205, implicit %206, implicit %207, implicit %208, implicit %209
+ S_NOP 0, implicit %210, implicit %211, implicit %212, implicit %213, implicit %214, implicit %215, implicit %216, implicit %217, implicit %218, implicit %219
+ S_NOP 0, implicit %220, implicit %221, implicit %222, implicit %223, implicit %224, implicit %225, implicit %226, implicit %227, implicit %228, implicit %229
+ S_NOP 0, implicit %230, implicit %231, implicit %232, implicit %233, implicit %234, implicit %235, implicit %236, implicit %237, implicit %238, implicit %239
+ S_NOP 0, implicit %240, implicit %241, implicit %242, implicit %243, implicit %244, implicit %245, implicit %246, implicit %247, implicit %248, implicit %249
+ S_NOP 0, implicit %250, implicit %251, implicit %252, implicit %253, implicit %254, implicit %255, implicit %256, implicit %257, implicit %258
+
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
index f9413eed7521c..d8cca9e84ebee 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
@@ -6534,7 +6534,7 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: dead [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
@@ -6549,32 +6549,21 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: dead [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
; GFX908-NEXT: successors: %bb.3(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_17]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_18]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_19]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_20]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_21]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_22]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_23]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_24]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_9]], implicit [[V_CVT_I32_F64_e32_25]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_26]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_27]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_28]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_29]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_14]], implicit [[V_CVT_I32_F64_e32_30]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_31]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_32]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_5]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]], implicit [[V_CVT_I32_F64_e32_10]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]], implicit [[V_CVT_I32_F64_e32_15]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]], implicit [[V_CVT_I32_F64_e32_20]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]], implicit [[V_CVT_I32_F64_e32_25]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.3:
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_33]], implicit [[V_CVT_I32_F64_e32_]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_]]
; GFX908-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1
@@ -6616,27 +6605,16 @@ body: |
%30:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
%31:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
%32:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
- %33:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode, implicit-def $m0
bb.2:
successors: %bb.3
- S_NOP 0, implicit %2, implicit %18
- S_NOP 0, implicit %3, implicit %19
- S_NOP 0, implicit %4, implicit %20
- S_NOP 0, implicit %5, implicit %21
- S_NOP 0, implicit %6, implicit %22
- S_NOP 0, implicit %7, implicit %23
- S_NOP 0, implicit %8, implicit %24
- S_NOP 0, implicit %9, implicit %25
- S_NOP 0, implicit %10, implicit %26
- S_NOP 0, implicit %11, implicit %27
- S_NOP 0, implicit %12, implicit %28
- S_NOP 0, implicit %13, implicit %29
- S_NOP 0, implicit %14, implicit %30
- S_NOP 0, implicit %15, implicit %31
- S_NOP 0, implicit %16, implicit %32
- S_NOP 0, implicit %33
+ S_NOP 0, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6
+ S_NOP 0, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11
+ S_NOP 0, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16
+ S_NOP 0, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21
+ S_NOP 0, implicit %22, implicit %23, implicit %24, implicit %25, implicit %26
+ S_NOP 0, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31
bb.3:
S_NOP 0, implicit %0, implicit %1
@@ -6775,6 +6753,132 @@ body: |
S_ENDPGM 0
...
---
+name: test_remat_same_block
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $sgpr2_sgpr3
+
+ ; GFX908-LABEL: name: test_remat_same_block
+ ; GFX908: liveins: $sgpr2_sgpr3
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: %new_exec:sgpr_64 = COPY $sgpr2_sgpr3
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: %save_exec:sreg_64 = S_MOV_B64 $exec
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: $exec = S_MOV_B64 %new_exec
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_16]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_17]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_18]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_19]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_20]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_21]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_22]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_23]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_24]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_9]], implicit [[V_CVT_I32_F64_e32_25]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_26]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_27]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_28]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_14]], implicit [[V_CVT_I32_F64_e32_30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_31]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_32]]
+ ; GFX908-NEXT: $exec = S_MOV_B64 %save_exec
+ ; GFX908-NEXT: S_ENDPGM 0
+ %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
+ %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ %3:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ %4:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
+ %5:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
+ %6:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
+ %7:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
+ %8:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
+ %9:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
+ %10:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ %11:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
+ %12:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ %13:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
+ %14:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
+ %15:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
+ %16:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
+ %17:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ %18:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
+ %19:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ %20:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ %21:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ %22:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ %23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ %25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ %26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ %27:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
+ %28:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
+ %29:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
+ %30:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
+ %31:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
+ %32:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
+
+ %save_exec:sreg_64 = S_MOV_B64 $exec
+ %new_exec:sgpr_64 = COPY $sgpr2_sgpr3
+ $exec = S_MOV_B64 %new_exec
+
+ S_NOP 0, implicit %0, implicit %16
+ S_NOP 0, implicit %1, implicit %17
+ S_NOP 0, implicit %2, implicit %18
+ S_NOP 0, implicit %3, implicit %19
+ S_NOP 0, implicit %4, implicit %20
+ S_NOP 0, implicit %5, implicit %21
+ S_NOP 0, implicit %6, implicit %22
+ S_NOP 0, implicit %7, implicit %23
+ S_NOP 0, implicit %8, implicit %24
+ S_NOP 0, implicit %9, implicit %25
+ S_NOP 0, implicit %10, implicit %26
+ S_NOP 0, implicit %11, implicit %27
+ S_NOP 0, implicit %12, implicit %28
+ S_NOP 0, implicit %13, implicit %29
+ S_NOP 0, implicit %14, implicit %30
+ S_NOP 0, implicit %15, implicit %31
+ S_NOP 0, implicit %32
+
+ $exec = S_MOV_B64 %save_exec
+ S_ENDPGM 0
+...
+---
name: test_rollback_remat_defregion_above_target
tracksRegLiveness: true
machineFunctionInfo:
>From 799cfed234712e6b7112a5ba87c9763d0da44f12 Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Mon, 24 Mar 2025 14:34:05 +0000
Subject: [PATCH 10/10] Remove manual recomputation of live intervals, use the
LIS methods
This fails in some complex use cases, and it is not clear whether it is
meaningfully faster that just letting the LIS object recompute it from
scratch.
Also removed stale comment in isTriviallyRematerilizable: we can now
remat MIs with virtual register uses.
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 43 ++-------------------
1 file changed, 3 insertions(+), 40 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index bb017be76bab9..69d4bb0190dde 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -2065,7 +2065,6 @@ void PreRARematStage::rematerialize() {
// Collect regions whose RP changes in unpredictable way; we will have to
// fully recompute their RP after all rematerailizations.
DenseSet<unsigned> RecomputeRP;
- SlotIndexes *Slots = DAG.LIS->getSlotIndexes();
// Rematerialize all instructions.
for (auto &[DefMI, Remat] : Rematerializations) {
@@ -2147,41 +2146,10 @@ void PreRARematStage::rematerialize() {
ImpactedRegions.insert({DefRegion, DAG.Pressure[DefRegion]});
RecomputeRP.insert(DefRegion);
- // Update the register's live interval manually. This is aligned with the
- // instruction collection phase in that it assumes a single def and use
- // (possibly subreg).
- SlotIndex NewDef = Slots->getInstructionIndex(*Remat.RematMI).getRegSlot();
- SlotIndex NewKill = Slots->getInstructionIndex(*Remat.UseMI).getRegSlot();
-
- // Update the live range to reflect the register's rematerialization. We
- // will have a single segment (we only remat regs with one use) and at most
- // a single value number (we only remat virtual regs with a single def).
- auto UpdateLiveRange = [&](LiveRange &LR) -> void {
- assert(LR.segments.size() && "expected at least one segment");
- assert(LR.valnos.size() < 2 && "expected at most one value number");
-
- // Fix up the first segment and remove the others which have become
- // unnecessary by construction.
- LR.segments.truncate(1);
- LiveRange::Segment &Seg = LR.segments.front();
- Seg.start = NewDef;
- Seg.end = NewKill;
- LR.valnos.clear();
- if (Seg.valno) {
- // If present, update the segment's value number as well.
- Seg.valno->def = NewDef;
- LR.valnos.push_back(Seg.valno);
- }
- };
-
+ // Recompute live interval to reflect the register's rematerialization.
Register RematReg = Remat.RematMI->getOperand(0).getReg();
- LiveInterval &RegLI = DAG.LIS->getInterval(RematReg);
- UpdateLiveRange(RegLI);
- if (RegLI.hasSubRanges()) {
- LiveInterval::SubRange &SubRange = *RegLI.subrange_begin();
- assert(!SubRange.Next && "expected at most one subrange");
- UpdateLiveRange(SubRange);
- }
+ DAG.LIS->removeInterval(RematReg);
+ DAG.LIS->createAndComputeVirtRegInterval(RematReg);
}
// All regions impacted by at least one rematerialization must be rescheduled.
@@ -2221,11 +2189,6 @@ bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) {
if (!DAG.TII->isTriviallyReMaterializable(MI))
return false;
- // Even though TargetInstrInfo::isReallyTriviallyReMaterializable already
- // ensures that the instruction has no virtual register uses,
- // SIInstrInfo::isReallyTriviallyReMaterializable may consider an instruction
- // rematerializable and return before calling its parent's method, so we need
- // to double-check here.
for (const MachineOperand &MO : MI.all_uses()) {
// We can't remat physreg uses, unless it is a constant or an ignorable
// use (e.g. implicit exec use on VALU instructions)
More information about the llvm-commits
mailing list