[llvm] [AMDGPU][Scheduler] Scoring system for rematerialization candidates (PR #153092)
Lucas Ramirez via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 8 05:04:17 PDT 2025
https://github.com/lucas-rami updated https://github.com/llvm/llvm-project/pull/153092
>From 0c794e2432090963547c6ffe4a8b69fa35842122 Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Mon, 11 Aug 2025 16:09:27 +0000
Subject: [PATCH 01/10] Scoring system for rematerializations
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 740 ++++++++-----
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 233 ++++-
.../GlobalISel/llvm.amdgcn.wqm.demote.ll | 20 +-
.../AMDGPU/GlobalISel/vni8-across-blocks.ll | 32 +-
.../AMDGPU/buffer-fat-pointers-memcpy.ll | 12 +-
llvm/test/CodeGen/AMDGPU/call-waitcnt.ll | 8 +-
.../AMDGPU/dbg-value-ends-sched-region.mir | 34 +-
.../AMDGPU/dbg-value-starts-sched-region.mir | 4 +-
.../AMDGPU/debug-value-scheduler-crash.mir | 58 +-
llvm/test/CodeGen/AMDGPU/ds_read2-gfx1250.ll | 6 +-
llvm/test/CodeGen/AMDGPU/ds_read2.ll | 12 +-
.../test/CodeGen/AMDGPU/dynamic_stackalloc.ll | 25 +-
.../CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll | 15 +-
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll | 16 +-
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll | 12 +-
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll | 12 +-
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll | 12 +-
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll | 12 +-
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll | 16 +-
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll | 12 +-
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll | 12 +-
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll | 12 +-
...ne-scheduler-rematerialization-scoring.mir | 535 ++++++++++
...ine-scheduler-sink-trivial-remats-attr.mir | 728 +++++++------
.../machine-scheduler-sink-trivial-remats.mir | 990 +++++++++---------
...ne-sink-temporal-divergence-swdev407790.ll | 17 +-
llvm/test/CodeGen/AMDGPU/mfma-loop.ll | 162 +--
...ssert-dead-def-subreg-use-other-subreg.mir | 6 +-
...ched-assert-onlydbg-value-empty-region.mir | 2 +-
llvm/test/CodeGen/AMDGPU/schedule-ilp.mir | 16 +-
.../AMDGPU/spill-empty-live-interval.mir | 2 +-
.../test/CodeGen/AMDGPU/vni8-across-blocks.ll | 84 +-
llvm/test/CodeGen/AMDGPU/wave32.ll | 4 +-
33 files changed, 2371 insertions(+), 1490 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/machine-scheduler-rematerialization-scoring.mir
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index bdc08101c7119..fca3eda298f7a 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -28,11 +28,22 @@
#include "GCNRegPressure.h"
#include "SIMachineFunctionInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <deque>
+#include <limits>
+#include <string>
#define DEBUG_TYPE "machine-scheduler"
@@ -809,6 +820,8 @@ void GCNScheduleDAGMILive::schedule() {
GCNRegPressure
GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {
+ if (Regions[RegionIdx].first == Regions[RegionIdx].second)
+ return llvm::getRegPressure(MRI, LiveIns[RegionIdx]);
GCNDownwardRPTracker RPTracker(*LIS);
RPTracker.advance(Regions[RegionIdx].first, Regions[RegionIdx].second,
&LiveIns[RegionIdx]);
@@ -1090,33 +1103,224 @@ bool ClusteredLowOccStage::initGCNSchedStage() {
#define REMAT_PREFIX "[PreRARemat] "
#define REMAT_DEBUG(X) LLVM_DEBUG(dbgs() << REMAT_PREFIX; X;)
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void PreRARematStage::printTargetRegions(bool PrintAll) const {
+ if (PrintAll) {
+ for (auto [I, Target] : enumerate(RPTargets))
+ REMAT_DEBUG(dbgs() << " [" << I << "] " << Target << '\n');
+ return;
+ }
+ if (TargetRegions.none()) {
+ REMAT_DEBUG(dbgs() << "No target regions\n");
+ return;
+ }
+ REMAT_DEBUG(dbgs() << "Target regions:\n");
+ for (unsigned I : TargetRegions.set_bits())
+ REMAT_DEBUG(dbgs() << " [" << I << "] " << RPTargets[I] << '\n');
+}
+
+void PreRARematStage::RematReg::print(
+ const DenseMap<MachineInstr *, unsigned> &MIRegion) const {
+ REMAT_DEBUG(dbgs() << " [" << MIRegion.at(DefMI) << "] " << *DefMI);
+ REMAT_DEBUG(dbgs() << " -> used in [" << UseRegion << "] " << *UseMI);
+ const unsigned NumRegions = Live.size();
+ REMAT_DEBUG(dbgs() << " Guaranteed RP reduction in:");
+ for (unsigned I = 0; I < NumRegions; ++I) {
+ if (isBeneficialRegion(I))
+ dbgs() << " [" << I << "]";
+ }
+ dbgs() << '\n';
+ REMAT_DEBUG(dbgs() << " Possible RP reduction in:");
+ for (unsigned I = 0; I < NumRegions; ++I) {
+ if (isMaybeBeneficialRegion(I))
+ dbgs() << " [" << I << "]";
+ }
+ dbgs() << '\n';
+}
+
+#endif
+
bool PreRARematStage::initGCNSchedStage() {
// FIXME: This pass will invalidate cached BBLiveInMap and MBBLiveIns for
// regions inbetween the defs and region we sinked the def to. Will need to be
// fixed if there is another pass after this pass.
assert(!S.hasNextStage());
- if (!GCNSchedStage::initGCNSchedStage() || DAG.Regions.size() == 1)
+ if (!GCNSchedStage::initGCNSchedStage() || DAG.Regions.size() <= 1)
return false;
// Before performing any IR modification record the parent region of each MI
// and the parent MBB of each region.
const unsigned NumRegions = DAG.Regions.size();
- RegionBB.reserve(NumRegions);
for (unsigned I = 0; I < NumRegions; ++I) {
RegionBoundaries Region = DAG.Regions[I];
for (auto MI = Region.first; MI != Region.second; ++MI)
MIRegion.insert({&*MI, I});
- RegionBB.push_back(Region.first->getParent());
+ MachineBasicBlock *ParentMBB = Region.first->getParent();
+ if (Region.second != ParentMBB->end())
+ MIRegion.insert({&*Region.second, I});
+ RegionBB.push_back(ParentMBB);
+ }
+
+ setObjective();
+ REMAT_DEBUG({
+ dbgs() << "Analyzing ";
+ MF.getFunction().printAsOperand(dbgs(), false);
+ dbgs() << ": ";
+ if (TargetRegions.none()) {
+ dbgs() << "no objective to achieve, occupancy is maximal at "
+ << MFI.getMaxWavesPerEU() << '\n';
+ } else if (TargetOcc) {
+ dbgs() << "increase occupancy from " << *TargetOcc - 1 << '\n';
+ } else {
+ dbgs() << "reduce spilling (minimum target occupancy is "
+ << MFI.getMinWavesPerEU() << ")\n";
+ }
+ printTargetRegions(/*PrintAll=*/TargetRegions.none());
+ });
+
+ // Compute region frequencies. 0 encodes an unknown region frequency.
+ SmallVector<uint64_t> RegionFreq;
+ RegionFreq.reserve(NumRegions);
+ assert(DAG.MLI && "MLI not defined in DAG");
+ MachineBranchProbabilityInfo MBPI;
+ MachineBlockFrequencyInfo MBFI(MF, MBPI, *DAG.MLI);
+ uint64_t EntryFreq = MBFI.getEntryFreq().getFrequency();
+ if (EntryFreq) {
+ for (const MachineBasicBlock *MBB : RegionBB)
+ RegionFreq.push_back(MBFI.getBlockFreq(MBB).getFrequency() / EntryFreq);
+ } else {
+ RegionFreq.insert(RegionFreq.end(), RegionBB.size(), 0);
+ }
+ REMAT_DEBUG({
+ dbgs() << "Region frequencies:\n";
+ for (auto [I, Freq] : enumerate(RegionFreq)) {
+ dbgs() << REMAT_PREFIX << " [" << I << "] ";
+ if (Freq)
+ dbgs() << Freq;
+ else
+ dbgs() << "unknown ";
+ dbgs() << " | " << *DAG.Regions[I].first;
+ }
+ });
+
+ if (!collectRematRegs(RegionFreq)) {
+ REMAT_DEBUG(dbgs() << "No rematerializable registers\n");
+ return false;
+ }
+
+ REMAT_DEBUG({
+ dbgs() << "Rematerializable registers:\n";
+ for (const RematReg &Remat : RematRegs)
+ Remat.print(MIRegion);
+ });
+
+ // Start by rematerializing always beneficial registers. These should never
+ // be rollbacked. All other rematerialization candidates get added to list of
+ // rematerializations that will be scored.
+ REMAT_DEBUG(dbgs() << "==== ALWAYS BENEFICIAL ====\n");
+ SmallVector<ScoredRemat> ScoredRemats;
+ BitVector RecomputeRP(NumRegions);
+ for (const RematReg &Remat : RematRegs) {
+ if (Remat.isAlwaysBeneficial()) {
+ REMAT_DEBUG(dbgs() << "[" << MIRegion[Remat.DefMI]
+ << "] REMAT (always) | " << *Remat.DefMI);
+ rematerialize(Remat, RecomputeRP);
+ } else {
+ ScoredRemats.emplace_back(&Remat, DAG.ST, *DAG.TII);
+ }
}
+ unsetSatisifedRPTargets(RescheduleRegions);
+
+#ifndef NDEBUG
+ printTargetRegions();
+ unsigned RoundNum = 0;
+#endif
+
+ // Rematerialize registers in successive rounds until all RP targets are
+ // satisifed or until we run out of rematerialization candidates.
+ while ((updateAndVerifyRPTargets(RecomputeRP) || TargetRegions.any()) &&
+ !ScoredRemats.empty()) {
+ // (Re-)Score and (re-)sort all remats in increasing score order.
+ for (ScoredRemat &Remat : ScoredRemats)
+ Remat.update(TargetRegions, RPTargets, RegionFreq, !TargetOcc);
+ stable_sort(ScoredRemats);
+
+ REMAT_DEBUG({
+ dbgs() << "==== ROUND " << RoundNum << " ====\n";
+ for (const ScoredRemat &SRemat : ScoredRemats) {
+ dbgs() << REMAT_PREFIX << "*" << SRemat.getScore() << "* | "
+ << *SRemat.Remat->DefMI;
+ }
+ });
+
+ RecomputeRP.reset();
+ int RematIdx = ScoredRemats.size() - 1;
+
+ // Rematerialize registers in decreasing score order until we estimate that
+ // all RP targets are satisfied or until rematerialization candidates are no
+ // longer useful to decrease RP.
+ for (; RematIdx >= 0 && TargetRegions.any(); --RematIdx) {
+ const RematReg &Remat = *ScoredRemats[RematIdx].Remat;
+ int Score = ScoredRemats[RematIdx].getScore();
+
+ // Stop when scores become negative. Since scores monotonically decrease
+ // as remats are performed, we know there is nothing useful left to do in
+ // such cases.
+ if (Score <= 0) {
+ REMAT_DEBUG(dbgs() << "Stop remats on non-positive score | "
+ << *Remat.DefMI);
+ RematIdx = -1;
+ break;
+ }
- if (!canIncreaseOccupancyOrReduceSpill())
+ // When previous rematerializations in this round have already satisfied
+ // RP targets in all regions this rematerialization can impact, we have a
+ // good indication that our scores have diverged significantly from
+ // reality, in which case we interrupt this round and re-score. This also
+ // ensures that every rematerialization we perform is possibly impactful
+ // in at least one target region.
+ if (!Remat.intersectWithTarget(TargetRegions)) {
+ REMAT_DEBUG(dbgs() << "Stop round on stale score | " << *Remat.DefMI);
+ break;
+ }
+
+ REMAT_DEBUG(dbgs() << "[" << MIRegion[Remat.DefMI] << "] REMAT *" << Score
+ << "* | " << *Remat.DefMI);
+ MachineInstr *RematMI = rematerialize(Remat, RecomputeRP);
+ // Every rematerialization done with the objective of increasing occupancy
+ // increases latency. If we don't manage to increase occupancy, we want to
+ // roll them back.
+ if (TargetOcc)
+ Rollbackable.push_back({RematMI, &Remat});
+ unsetSatisifedRPTargets(Remat.Live);
+ }
+
+#ifndef NDEBUG
+ printTargetRegions();
+ ++RoundNum;
+#endif
+
+ // Peel off registers we already rematerialized from the vector's tail.
+ ScoredRemats.truncate(RematIdx + 1);
+ }
+ if (RescheduleRegions.none())
return false;
- // Rematerialize identified instructions and update scheduler's state.
- rematerialize();
- if (GCNTrackers)
- DAG.RegionLiveOuts.buildLiveRegMap();
+ // Commit all pressure changes to the DAG and compute minimum achieved
+ // occupancy in impacted regions.
+ REMAT_DEBUG(dbgs() << "==== REMAT RESULTS ====\n");
+ unsigned DynamicVGPRBlockSize = MFI.getDynamicVGPRBlockSize();
+ AchievedOcc = MFI.getMaxWavesPerEU();
+ for (unsigned I : RescheduleRegions.set_bits()) {
+ const GCNRegPressure &RP = RPTargets[I].getCurrentRP();;
+ DAG.Pressure[I] = RP;
+ unsigned NewRegionOcc = RP.getOccupancy(ST, DynamicVGPRBlockSize);
+ AchievedOcc = std::min(AchievedOcc, NewRegionOcc);
+ REMAT_DEBUG(dbgs() << "[" << I << "] Achieved occupancy " << NewRegionOcc
+ << " (" << RPTargets[I] << ")\n");
+ }
+
REMAT_DEBUG({
dbgs() << "Retrying function scheduling with new min. occupancy of "
<< AchievedOcc << " from rematerializing (original was "
@@ -1125,7 +1329,6 @@ bool PreRARematStage::initGCNSchedStage() {
dbgs() << ", target was " << *TargetOcc;
dbgs() << ")\n";
});
-
if (AchievedOcc > DAG.MinOccupancy) {
DAG.MinOccupancy = AchievedOcc;
SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
@@ -1152,6 +1355,10 @@ void UnclusteredHighRPStage::finalizeGCNSchedStage() {
}
bool GCNSchedStage::initGCNRegion() {
+ // Skip empty scheduling region.
+ if (DAG.begin() == DAG.end())
+ return false;
+
// Check whether this new region is also a new block.
if (DAG.RegionBegin->getParent() != CurrentMBB)
setupNewBlock();
@@ -1159,8 +1366,8 @@ bool GCNSchedStage::initGCNRegion() {
unsigned NumRegionInstrs = std::distance(DAG.begin(), DAG.end());
DAG.enterRegion(CurrentMBB, DAG.begin(), DAG.end(), NumRegionInstrs);
- // Skip empty scheduling regions (0 or 1 schedulable instructions).
- if (DAG.begin() == DAG.end() || DAG.begin() == std::prev(DAG.end()))
+ // Skip regions with 1 schedulable instruction.
+ if (DAG.begin() == std::prev(DAG.end()))
return false;
LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
@@ -1634,27 +1841,20 @@ void GCNSchedStage::revertScheduling() {
DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
}
-bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
+void PreRARematStage::setObjective() {
const Function &F = MF.getFunction();
- // Maps optimizable regions (i.e., regions at minimum and register-limited
- // occupancy, or regions with spilling) to the target RP we would like to
- // reach.
- DenseMap<unsigned, GCNRPTarget> OptRegions;
+ // Set up "spilling targets" for all regions.
unsigned MaxSGPRs = ST.getMaxNumSGPRs(F);
unsigned MaxVGPRs = ST.getMaxNumVGPRs(F);
- auto ResetTargetRegions = [&]() {
- OptRegions.clear();
- for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
- const GCNRegPressure &RP = DAG.Pressure[I];
- GCNRPTarget Target(MaxSGPRs, MaxVGPRs, MF, RP);
- if (!Target.satisfied())
- OptRegions.insert({I, Target});
- }
- };
+ for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
+ const GCNRegPressure &RP = DAG.Pressure[I];
+ GCNRPTarget &Target = RPTargets.emplace_back(MaxSGPRs, MaxVGPRs, MF, RP);
+ if (!Target.satisfied())
+ TargetRegions.set(I);
+ }
- ResetTargetRegions();
- if (!OptRegions.empty() || DAG.MinOccupancy >= MFI.getMaxWavesPerEU()) {
+ if (TargetRegions.any() || DAG.MinOccupancy >= MFI.getMaxWavesPerEU()) {
// In addition to register usage being above addressable limits, occupancy
// below the minimum is considered like "spilling" as well.
TargetOcc = std::nullopt;
@@ -1662,59 +1862,27 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
// There is no spilling and room to improve occupancy; set up "increased
// occupancy targets" for all regions.
TargetOcc = DAG.MinOccupancy + 1;
- unsigned VGPRBlockSize =
- MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
+ const unsigned VGPRBlockSize = MFI.getDynamicVGPRBlockSize();
MaxSGPRs = ST.getMaxNumSGPRs(*TargetOcc, false);
MaxVGPRs = ST.getMaxNumVGPRs(*TargetOcc, VGPRBlockSize);
- ResetTargetRegions();
- }
- REMAT_DEBUG({
- dbgs() << "Analyzing ";
- MF.getFunction().printAsOperand(dbgs(), false);
- dbgs() << ": ";
- if (OptRegions.empty()) {
- dbgs() << "no objective to achieve, occupancy is maximal at "
- << MFI.getMaxWavesPerEU();
- } else if (!TargetOcc) {
- dbgs() << "reduce spilling (minimum target occupancy is "
- << MFI.getMinWavesPerEU() << ')';
- } else {
- dbgs() << "increase occupancy from " << DAG.MinOccupancy << " to "
- << TargetOcc;
- }
- dbgs() << '\n';
- for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
- if (auto OptIt = OptRegions.find(I); OptIt != OptRegions.end()) {
- dbgs() << REMAT_PREFIX << " [" << I << "] " << OptIt->getSecond()
- << '\n';
- }
+ for (auto [I, Target] : enumerate(RPTargets)) {
+ Target.setTarget(MaxSGPRs, MaxVGPRs);
+ if (!Target.satisfied())
+ TargetRegions.set(I);
}
- });
- if (OptRegions.empty())
- return false;
+ }
+}
- // Accounts for a reduction in RP in an optimizable region. Returns whether we
- // estimate that we have identified enough rematerialization opportunities to
- // achieve our goal, and sets Progress to true when this particular reduction
- // in pressure was helpful toward that goal.
- auto ReduceRPInRegion = [&](auto OptIt, Register Reg, LaneBitmask Mask,
- bool &Progress) -> bool {
- GCNRPTarget &Target = OptIt->getSecond();
- if (!Target.isSaveBeneficial(Reg))
- return false;
- Progress = true;
- Target.saveReg(Reg, Mask, DAG.MRI);
- if (Target.satisfied())
- OptRegions.erase(OptIt->getFirst());
- return OptRegions.empty();
- };
+bool PreRARematStage::collectRematRegs(ArrayRef<uint64_t> RegionFreq) {
+ assert(RegionFreq.size() == DAG.Regions.size());
// We need up-to-date live-out info. to query live-out register masks in
// regions containing rematerializable instructions.
DAG.RegionLiveOuts.buildLiveRegMap();
- // Cache set of registers that are going to be rematerialized.
- DenseSet<unsigned> RematRegs;
+ // Set of registers already marked for potential remterialization; used for
+ // remat chains checks.
+ DenseSet<Register> RematRegSet;
// Identify rematerializable instructions in the function.
for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
@@ -1725,30 +1893,34 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
if (!isReMaterializable(DefMI))
continue;
- // We only support rematerializing virtual registers with one definition.
+ // We only support rematerializing virtual registers with one
+ // definition.
Register Reg = DefMI.getOperand(0).getReg();
if (!Reg.isVirtual() || !DAG.MRI.hasOneDef(Reg))
continue;
// We only care to rematerialize the instruction if it has a single
- // non-debug user in a different region. The using MI may not belong to a
- // region if it is a lone region terminator.
+ // non-debug user in a different region.
+ // FIXME: Allow rematerializations with multiple uses. This should be
+ // relatively easy to support using the current cost model.
MachineInstr *UseMI = DAG.MRI.getOneNonDBGUser(Reg);
if (!UseMI)
continue;
auto UseRegion = MIRegion.find(UseMI);
- if (UseRegion != MIRegion.end() && UseRegion->second == I)
+ if (UseRegion == MIRegion.end() || UseRegion->second == I)
continue;
// Do not rematerialize an instruction if it uses or is used by an
// instruction that we have designated for rematerialization.
// FIXME: Allow for rematerialization chains: this requires 1. updating
- // remat points to account for uses that are rematerialized, and 2. either
- // rematerializing the candidates in careful ordering, or deferring the
- // MBB RP walk until the entire chain has been rematerialized.
- if (Rematerializations.contains(UseMI) ||
- llvm::any_of(DefMI.operands(), [&RematRegs](MachineOperand &MO) {
- return MO.isReg() && RematRegs.contains(MO.getReg());
+ // remat points to account for uses that are rematerialized, and 2.
+ // either rematerializing the candidates in careful ordering, or
+ // deferring the MBB RP walk until the entire chain has been
+ // rematerialized.
+ MachineOperand &UseFirstMO = UseMI->getOperand(0);
+ if ((UseFirstMO.isReg() && RematRegSet.contains(UseFirstMO.getReg())) ||
+ llvm::any_of(DefMI.operands(), [&RematRegSet](MachineOperand &MO) {
+ return MO.isReg() && RematRegSet.contains(MO.getReg());
}))
continue;
@@ -1760,106 +1932,146 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
*DAG.TII))
continue;
- REMAT_DEBUG(dbgs() << "Region " << I << ": remat instruction " << DefMI);
- RematInstruction &Remat =
- Rematerializations.try_emplace(&DefMI, UseMI).first->second;
-
- bool RematUseful = false;
- if (auto It = OptRegions.find(I); It != OptRegions.end()) {
- // Optimistically consider that moving the instruction out of its
- // defining region will reduce RP in the latter; this assumes that
- // maximum RP in the region is reached somewhere between the defining
- // instruction and the end of the region.
- REMAT_DEBUG(dbgs() << " Defining region is optimizable\n");
- LaneBitmask Mask = DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I)[Reg];
- if (ReduceRPInRegion(It, Reg, Mask, RematUseful))
- return true;
- }
-
- for (unsigned LIRegion = 0; LIRegion != E; ++LIRegion) {
- // We are only collecting regions in which the register is a live-in
- // (and may be live-through).
- auto It = DAG.LiveIns[LIRegion].find(Reg);
- if (It == DAG.LiveIns[LIRegion].end() || It->second.none())
- continue;
- Remat.LiveInRegions.insert(LIRegion);
-
- // Account for the reduction in RP due to the rematerialization in an
- // optimizable region in which the defined register is a live-in. This
- // is exact for live-through region but optimistic in the using region,
- // where RP is actually reduced only if maximum RP is reached somewhere
- // between the beginning of the region and the rematerializable
- // instruction's use.
- if (auto It = OptRegions.find(LIRegion); It != OptRegions.end()) {
- REMAT_DEBUG(dbgs() << " Live-in in region " << LIRegion << '\n');
- if (ReduceRPInRegion(It, Reg, DAG.LiveIns[LIRegion][Reg],
- RematUseful))
- return true;
- }
- }
-
- // If the instruction is not a live-in or live-out in any optimizable
- // region then there is no point in rematerializing it.
- if (!RematUseful) {
- Rematerializations.pop_back();
- REMAT_DEBUG(dbgs() << " No impact, not rematerializing instruction\n");
- } else {
- RematRegs.insert(Reg);
- }
+ // Add the instruction to the rematerializable list.
+ RematRegSet.insert(Reg);
+ RematRegs.emplace_back(&DefMI, UseMI, DAG, MIRegion, RegionFreq);
}
}
- if (TargetOcc) {
- // We were trying to increase occupancy but failed, abort the stage.
- REMAT_DEBUG(dbgs() << "Cannot increase occupancy\n");
- Rematerializations.clear();
- return false;
- }
- REMAT_DEBUG(dbgs() << "Can reduce but not eliminate spilling\n");
- return !Rematerializations.empty();
+ return !RematRegs.empty();
}
-void PreRARematStage::rematerialize() {
- const SIInstrInfo *TII = MF.getSubtarget<GCNSubtarget>().getInstrInfo();
+PreRARematStage::RematReg::RematReg(
+ MachineInstr *DefMI, MachineInstr *UseMI, GCNScheduleDAGMILive &DAG,
+ const DenseMap<MachineInstr *, unsigned> &MIRegion,
+ ArrayRef<uint64_t> RegionFreq)
+ : DefMI(DefMI), UseMI(UseMI), UseRegion(MIRegion.at(UseMI)),
+ LiveIn(DAG.Regions.size()), LiveOut(DAG.Regions.size()),
+ Live(DAG.Regions.size()), DefFrequency(RegionFreq[MIRegion.at(DefMI)]),
+ UseFrequency(RegionFreq[MIRegion.at(UseMI)]) {
- // Collect regions whose RP changes in unpredictable way; we will have to
- // fully recompute their RP after all rematerailizations.
- DenseSet<unsigned> RecomputeRP;
-
- // Rematerialize all instructions.
- for (auto &[DefMI, Remat] : Rematerializations) {
- MachineBasicBlock::iterator InsertPos(Remat.UseMI);
- Register Reg = DefMI->getOperand(0).getReg();
- unsigned DefRegion = MIRegion.at(DefMI);
-
- // Rematerialize DefMI to its use block.
- TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg,
- AMDGPU::NoSubRegister, *DefMI, *DAG.TRI);
- Remat.RematMI = &*std::prev(InsertPos);
- DAG.LIS->InsertMachineInstrInMaps(*Remat.RematMI);
-
- // Update region boundaries in regions we sinked from (remove defining MI)
- // and to (insert MI rematerialized in use block). Only then we can erase
- // the original MI.
- DAG.updateRegionBoundaries(DAG.Regions[DefRegion], DefMI, nullptr);
- auto UseRegion = MIRegion.find(Remat.UseMI);
- if (UseRegion != MIRegion.end()) {
- DAG.updateRegionBoundaries(DAG.Regions[UseRegion->second], InsertPos,
- Remat.RematMI);
+ // Mark regions in which the rematerializable register is live.
+ Register Reg = DefMI->getOperand(0).getReg();
+ for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
+ auto LiveInIt = DAG.LiveIns[I].find(Reg);
+ if (LiveInIt != DAG.LiveIns[I].end() && LiveInIt->second.any())
+ LiveIn.set(I);
+ auto LiveOutIt = DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I).find(Reg);
+ auto LiveOutEnd = DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I).end();
+ if (LiveOutIt != LiveOutEnd && LiveOutIt->second.any())
+ LiveOut.set(I);
+ }
+ Live |= LiveIn;
+ Live |= LiveOut;
+
+ // Store the register's lane bitmask.
+ unsigned SubReg = DefMI->getOperand(0).getSubReg();
+ Mask = SubReg ? DAG.TRI->getSubRegIndexLaneMask(SubReg)
+ : DAG.MRI.getMaxLaneMaskForVReg(Reg);
+}
+
+MachineInstr *
+PreRARematStage::RematReg::insertMI(unsigned RegionIdx,
+ MachineBasicBlock::iterator InsertPos,
+ GCNScheduleDAGMILive &DAG) const {
+ MachineInstr *NewMI = &*std::prev(InsertPos);
+ DAG.updateRegionBoundaries(DAG.Regions[RegionIdx], InsertPos, NewMI);
+ DAG.LIS->InsertMachineInstrInMaps(*NewMI);
+ DAG.LIS->createAndComputeVirtRegInterval(NewMI->getOperand(0).getReg());
+ return NewMI;
+}
+
+PreRARematStage::ScoredRemat::ScoredRemat(const RematReg *Remat,
+ const GCNSubtarget &ST,
+ const TargetInstrInfo &TII)
+ : Remat(Remat) {
+ const InstrItineraryData *Itin = ST.getInstrItineraryData();
+ if (Remat->DefFrequency && Remat->UseFrequency) {
+ InstrLatencyGain = Remat->DefFrequency - Remat->UseFrequency;
+ *InstrLatencyGain *= TII.getInstrLatency(Itin, *Remat->DefMI);
+ }
+ resetScore();
+}
+
+void PreRARematStage::ScoredRemat::update(const BitVector &TargetRegions,
+ ArrayRef<GCNRPTarget> RPTargets,
+ ArrayRef<uint64_t> RegionFreq,
+ bool ReduceSpill) {
+ // Exit early if no target region intersects with the registers's live
+ // regions.
+ if (!Remat->intersectWithTarget(TargetRegions))
+ return setUselessRemat();
+ resetScore();
+
+ // When the stage is trying to reduce spilling, we want to pick
+ // rematerialization candidates that will be beneficial to latency. When it is
+ // trying to increase occupancy, we are fine increasing latency to try to
+ // reduce RP.
+ // FIXME: In the increasing occupancy case, we should be able to incorporate
+ // the latency loss induced by rematerializations into the final score. It
+ // seems possible to very roughly estimate the overall kernel latency upside
+ // we get by increasing occupancy and compare it to the latency hit each wave
+ // will be subjected to.
+ if (ReduceSpill) {
+ // It may be better to let the register spill if it is defined by a very
+ // high latency instruction. Try to estimate the latency gain induced by
+ // rematerializing the register.
+ //
+ // If we don't know the rematerializations's latency gain we don't know
+ // what to compare the spill latency against. We still consider the
+ // rematerialization potentially beneficial in such cases because we don't
+ // want to miss rematerialization opportunities and rematerializing is in
+ // most cases cheaper than spilling. We still give a bonus to remats for
+ // which we are able to do the calculation.
+ if (InstrLatencyGain && *InstrLatencyGain < 0) {
+ int SpillLatencyGain = SaveCost * Remat->DefFrequency;
+ SpillLatencyGain += RestoreCost * Remat->UseFrequency;
+ if (*InstrLatencyGain + SpillLatencyGain < 0)
+ return setUselessRemat();
+ setKnownLatencyGain();
}
- DAG.LIS->RemoveMachineInstrFromMaps(*DefMI);
- DefMI->eraseFromParent();
+ }
+
+ // The estimated RP reduction is proportional to the total frequency in target
+ // regions where the register is live.
+ Register Reg = Remat->DefMI->getOperand(0).getReg();
+ unsigned RPScore = 0;
+ for (unsigned I : TargetRegions.set_bits()) {
+ unsigned Freq = std::max(RegionFreq[I], static_cast<uint64_t>(1));
+ if (Remat->isBeneficialRegion(I))
+ Score += WeightRP * RPTargets[I].isSaveBeneficial(Reg) * Freq;
+ else if (Remat->isMaybeBeneficialRegion(I))
+ Score += WeightRPMaybe * RPTargets[I].isSaveBeneficial(Reg) * Freq;
+ }
- // Collect all regions impacted by the rematerialization and update their
- // live-in/RP information.
- for (unsigned I : Remat.LiveInRegions) {
- ImpactedRegions.insert({I, DAG.Pressure[I]});
- GCNRPTracker::LiveRegSet &RegionLiveIns = DAG.LiveIns[I];
+ // The estimated RP reduction is directly proportional to the size of the
+ // rematerializable register.
+ setRPScore(RPScore * SIRegisterInfo::getNumCoveredRegs(Remat->Mask));
+}
+MachineInstr *PreRARematStage::rematerialize(const RematReg &Remat,
+ BitVector &RecomputeRP) {
+ const SIInstrInfo *TII = MF.getSubtarget<GCNSubtarget>().getInstrInfo();
+ MachineInstr &DefMI = *Remat.DefMI;
+ Register Reg = DefMI.getOperand(0).getReg();
+ const TargetRegisterClass *RC = DAG.MRI.getRegClass(Reg);
+ Register NewReg = DAG.MRI.createVirtualRegister(RC);
+
+ // Rematerialize the register in the region where it is used.
+ MachineBasicBlock::iterator InsertPos = Remat.UseMI;
+ TII->reMaterialize(*InsertPos->getParent(), InsertPos, NewReg, 0, DefMI,
+ *DAG.TRI);
+ Remat.UseMI->substituteRegister(Reg, NewReg, 0, *DAG.TRI);
+ MachineInstr *NewMI = Remat.insertMI(Remat.UseRegion, InsertPos, DAG);
+
+ // Remove the register from all regions where it is a live-in or live-out
+ // and adjust RP targets.
+ for (unsigned I : Remat.Live.set_bits()) {
#ifdef EXPENSIVE_CHECKS
- // All uses are known to be available / live at the remat point. Thus, the
- // uses should already be live in to the region.
- for (MachineOperand &MO : DefMI->operands()) {
+ if (!Remat.LiveIn[I] && Remat.LiveOut[I]) {
+ // All uses are known to be available / live at the remat point. Thus,
+ // the uses should already be live in to the region.
+ for (MachineOperand &MO : DefMI.operands()) {
if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
continue;
@@ -1872,7 +2084,7 @@ void PreRARematStage::rematerialize() {
if (LI.hasSubRanges() && MO.getSubReg())
LM = DAG.TRI->getSubRegIndexLaneMask(MO.getSubReg());
- LaneBitmask LiveInMask = RegionLiveIns.at(UseReg);
+ LaneBitmask LiveInMask = DAG.LiveIns[I].at(UseReg);
LaneBitmask UncoveredLanes = LM & ~(LiveInMask & LM);
// If this register has lanes not covered by the LiveIns, be sure they
// do not map to any subrange. ref:
@@ -1883,65 +2095,80 @@ void PreRARematStage::rematerialize() {
assert((SR.LaneMask & UncoveredLanes).none());
}
}
+ }
#endif
- // The register is no longer a live-in in all regions but the one that
- // contains the single use. In live-through regions, maximum register
- // pressure decreases predictably so we can directly update it. In the
- // using region, maximum RP may or may not decrease, so we will mark it
- // for re-computation after all materializations have taken place.
- LaneBitmask PrevMask = RegionLiveIns[Reg];
- RegionLiveIns.erase(Reg);
- RegMasks.insert({{I, Remat.RematMI->getOperand(0).getReg()}, PrevMask});
- if (Remat.UseMI->getParent() != DAG.Regions[I].first->getParent())
- DAG.Pressure[I].inc(Reg, PrevMask, LaneBitmask::getNone(), DAG.MRI);
- else
- RecomputeRP.insert(I);
+ // This save is exact in beneficial regions but optimistic in all other
+ // regions where the register is live.
+ RPTargets[I].saveReg(Reg, Remat.Mask, DAG.MRI);
+ DAG.LiveIns[I].erase(Reg);
+ DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I).erase(Reg);
+ if (!Remat.isBeneficialRegion(I))
+ RecomputeRP.set(I);
+ }
+
+ DAG.deleteMI(MIRegion.at(&DefMI), &DefMI);
+ RescheduleRegions |= Remat.Live;
+ return NewMI;
+}
+
+void PreRARematStage::rollback(const RollbackReg &Rollback) const {
+ const SIInstrInfo *TII = MF.getSubtarget<GCNSubtarget>().getInstrInfo();
+ auto &[RematMI, Remat] = Rollback;
+
+ // Recreate the original MI from the first rematerialization. Any
+ // rematerialization could do, this is just a simple way to do this.
+ unsigned DefRegion = MIRegion.at(Remat->DefMI);
+ MachineBasicBlock *MBB = RegionBB[DefRegion];
+ Register Reg = RematMI->getOperand(0).getReg();
+ const TargetRegisterClass *RC = DAG.MRI.getRegClass(Reg);
+ Register NewReg = DAG.MRI.createVirtualRegister(RC);
+
+ // Re-rematerialize MI in its original region. Note that it may not be
+ // rematerialized exactly in the same position as originally within the
+ // region, but it should not matter much.
+ MachineBasicBlock::iterator InsertPos(DAG.Regions[DefRegion].second);
+ TII->reMaterialize(*MBB, InsertPos, NewReg, 0, *RematMI, *DAG.TRI);
+ REMAT_DEBUG(dbgs() << "[" << DefRegion << "] Re-rematerialized as "
+ << *std::prev(InsertPos));
+ Remat->UseMI->substituteRegister(Reg, NewReg, 0, *DAG.TRI);
+ DAG.deleteMI(Remat->UseRegion, RematMI);
+ Remat->insertMI(DefRegion, InsertPos, DAG);
+
+ // Re-add the register as a live-in/live-out in all regions it used to be
+ // one in.
+ std::pair<Register, LaneBitmask> LiveReg(NewReg, Remat->Mask);
+ for (unsigned I : Remat->LiveIn.set_bits())
+ DAG.LiveIns[I].insert(LiveReg);
+ for (unsigned I : Remat->LiveOut.set_bits())
+ DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I).insert(LiveReg);
+}
+
+void PreRARematStage::unsetSatisifedRPTargets(const BitVector &Regions) {
+ for (unsigned I : Regions.set_bits()) {
+ if (TargetRegions[I] && RPTargets[I].satisfied()) {
+ REMAT_DEBUG(dbgs() << " [" << I << "] Target reached!\n");
+ TargetRegions.reset(I);
}
- // RP in the region from which the instruction was rematerialized may or may
- // not decrease.
- ImpactedRegions.insert({DefRegion, DAG.Pressure[DefRegion]});
- RecomputeRP.insert(DefRegion);
-
- // Recompute live interval to reflect the register's rematerialization.
- Register RematReg = Remat.RematMI->getOperand(0).getReg();
- DAG.LIS->removeInterval(RematReg);
- DAG.LIS->createAndComputeVirtRegInterval(RematReg);
- }
-
- // All regions impacted by at least one rematerialization must be rescheduled.
- // Maximum pressure must also be recomputed for all regions where it changed
- // non-predictably and checked against the target occupancy.
- unsigned DynamicVGPRBlockSize =
- MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
- AchievedOcc = MFI.getMaxWavesPerEU();
- for (auto &[I, OriginalRP] : ImpactedRegions) {
- bool IsEmptyRegion = DAG.Regions[I].first == DAG.Regions[I].second;
- RescheduleRegions[I] = !IsEmptyRegion;
- if (!RecomputeRP.contains(I))
- continue;
+ }
+}
- GCNRegPressure RP;
- if (IsEmptyRegion) {
- RP = getRegPressure(DAG.MRI, DAG.LiveIns[I]);
- } else {
- GCNDownwardRPTracker RPT(*DAG.LIS);
- auto *NonDbgMI = &*skipDebugInstructionsForward(DAG.Regions[I].first,
- DAG.Regions[I].second);
- if (NonDbgMI == DAG.Regions[I].second) {
- // Region is non-empty but contains only debug instructions.
- RP = getRegPressure(DAG.MRI, DAG.LiveIns[I]);
- } else {
- RPT.reset(*NonDbgMI, &DAG.LiveIns[I]);
- RPT.advance(DAG.Regions[I].second);
- RP = RPT.moveMaxPressure();
- }
+bool PreRARematStage::updateAndVerifyRPTargets(const BitVector &Regions) {
+ bool TooOptimistic = false;
+ for (unsigned I : Regions.set_bits()) {
+ GCNRPTarget &Target = RPTargets[I];
+ Target.setRP(DAG.getRealRegPressure(I));
+
+ // Since we were optimistic in assessing RP decreases in these regions, we
+ // may need to remark the target as a target region if RP didn't decrease
+ // as expected.
+ if (!TargetRegions[I] && !Target.satisfied()) {
+ REMAT_DEBUG(dbgs() << " [" << I << "] Incorrect RP estimation\n");
+ TooOptimistic = true;
+ TargetRegions.set(I);
}
- DAG.Pressure[I] = RP;
- AchievedOcc =
- std::min(AchievedOcc, RP.getOccupancy(ST, DynamicVGPRBlockSize));
}
- REMAT_DEBUG(dbgs() << "Achieved occupancy " << AchievedOcc << "\n");
+ return TooOptimistic;
}
// Copied from MachineLICM
@@ -1965,55 +2192,23 @@ bool PreRARematStage::isReMaterializable(const MachineInstr &MI) {
void PreRARematStage::finalizeGCNSchedStage() {
// We consider that reducing spilling is always beneficial so we never
// rollback rematerializations in such cases. It's also possible that
- // rescheduling lowers occupancy over the one achieved just through remats, in
- // which case we do not want to rollback either (the rescheduling was already
- // reverted in PreRARematStage::shouldRevertScheduling in such cases).
+ // rescheduling lowers occupancy over the one achieved just through remats,
+ // in which case we do not want to rollback either (the rescheduling was
+ // already reverted in PreRARematStage::shouldRevertScheduling in such
+ // cases).
unsigned MaxOcc = std::max(AchievedOcc, DAG.MinOccupancy);
if (!TargetOcc || MaxOcc >= *TargetOcc)
return;
- REMAT_DEBUG(dbgs() << "Rolling back all rematerializations\n");
- const SIInstrInfo *TII = MF.getSubtarget<GCNSubtarget>().getInstrInfo();
-
- // Rollback the rematerializations.
- for (const auto &[DefMI, Remat] : Rematerializations) {
- MachineInstr &RematMI = *Remat.RematMI;
- unsigned DefRegion = MIRegion.at(DefMI);
- MachineBasicBlock::iterator InsertPos(DAG.Regions[DefRegion].second);
- MachineBasicBlock *MBB = RegionBB[DefRegion];
- Register Reg = RematMI.getOperand(0).getReg();
-
- // Re-rematerialize MI at the end of its original region. Note that it may
- // not be rematerialized exactly in the same position as originally within
- // the region, but it should not matter much.
- TII->reMaterialize(*MBB, InsertPos, Reg, AMDGPU::NoSubRegister, RematMI,
- *DAG.TRI);
- MachineInstr *NewMI = &*std::prev(InsertPos);
- DAG.LIS->InsertMachineInstrInMaps(*NewMI);
-
- auto UseRegion = MIRegion.find(Remat.UseMI);
- if (UseRegion != MIRegion.end()) {
- DAG.updateRegionBoundaries(DAG.Regions[UseRegion->second], RematMI,
- nullptr);
- }
- DAG.updateRegionBoundaries(DAG.Regions[DefRegion], InsertPos, NewMI);
-
- // Erase rematerialized MI.
- DAG.LIS->RemoveMachineInstrFromMaps(RematMI);
- RematMI.eraseFromParent();
-
- // Recompute live interval for the re-rematerialized register
- DAG.LIS->removeInterval(Reg);
- DAG.LIS->createAndComputeVirtRegInterval(Reg);
-
- // Re-add the register as a live-in in all regions it used to be one in.
- for (unsigned LIRegion : Remat.LiveInRegions)
- DAG.LiveIns[LIRegion].insert({Reg, RegMasks.at({LIRegion, Reg})});
+ // Rollback, then recompute pressure in all affected regions.
+ REMAT_DEBUG(dbgs() << "==== ROLLBACK ====\n");
+ BitVector ImpactedRegions(DAG.Regions.size());
+ for (const RollbackReg &Rollback : Rollbackable) {
+ rollback(Rollback);
+ ImpactedRegions |= Rollback.second->Live;
}
-
- // Reset RP in all impacted regions.
- for (auto &[I, OriginalRP] : ImpactedRegions)
- DAG.Pressure[I] = OriginalRP;
+ for (unsigned I : ImpactedRegions.set_bits())
+ DAG.Pressure[I] = DAG.getRealRegPressure(I);
GCNSchedStage::finalizeGCNSchedStage();
}
@@ -2040,6 +2235,13 @@ void GCNScheduleDAGMILive::updateRegionBoundaries(
RegionBounds.first = NewMI; // Insertion
}
+void GCNScheduleDAGMILive::deleteMI(unsigned RegionIdx, MachineInstr *MI) {
+ updateRegionBoundaries(Regions[RegionIdx], MI, nullptr);
+ LIS->removeInterval(MI->getOperand(0).getReg());
+ LIS->RemoveMachineInstrFromMaps(*MI);
+ MI->eraseFromParent();
+}
+
static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG) {
const SIInstrInfo *SII = static_cast<const SIInstrInfo *>(DAG->TII);
return any_of(*DAG, [SII](MachineBasicBlock::iterator MI) {
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 8ea42677454e4..8134a768a2543 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -18,6 +18,8 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineScheduler.h"
+#include <cstdint>
+#include <limits>
namespace llvm {
@@ -297,6 +299,8 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID);
+ void deleteMI(unsigned RegionIdx, MachineInstr *MI);
+
public:
GCNScheduleDAGMILive(MachineSchedContext *C,
std::unique_ptr<MachineSchedStrategy> S);
@@ -432,70 +436,209 @@ class ClusteredLowOccStage : public GCNSchedStage {
};
/// Attempts to reduce function spilling or, if there is no spilling, to
-/// increase function occupancy by one with respect to ArchVGPR usage by sinking
-/// rematerializable instructions to their use. When the stage
-/// estimates reducing spilling or increasing occupancy is possible, as few
-/// instructions as possible are rematerialized to reduce potential negative
+/// increase function occupancy by one with respect to register usage by sinking
+/// rematerializable instructions to their use. When the stage estimates that
+/// reducing spilling or increasing occupancy is possible, it tries to
+/// rematerialize as few registers as possible to reduce potential negative
/// effects on function latency.
class PreRARematStage : public GCNSchedStage {
private:
- /// Useful information about a rematerializable instruction.
- struct RematInstruction {
- /// Single use of the rematerializable instruction's defined register,
- /// located in a different block.
+ /// Groups information about a rematerializable register.
+ struct RematReg {
+ /// Single MI defining the rematerializable register.
+ MachineInstr *DefMI;
+ /// Single user of the rematerializable register.
MachineInstr *UseMI;
- /// Rematerialized version of \p DefMI, set in
- /// PreRARematStage::rematerialize. Used for reverting rematerializations.
- MachineInstr *RematMI;
- /// Set of regions in which the rematerializable instruction's defined
- /// register is a live-in.
- SmallDenseSet<unsigned, 4> LiveInRegions;
-
- RematInstruction(MachineInstr *UseMI) : UseMI(UseMI) {}
+ /// Using region.
+ unsigned UseRegion;
+ /// Regions in which the register is live-in/live-out/live anywhere.
+ BitVector LiveIn, LiveOut, Live;
+ /// The rematerializable register's lane bitmask.
+ LaneBitmask Mask;
+ /// Frequency of region defining/using the register. 0 when unknown.
+ unsigned DefFrequency, UseFrequency;
+
+ RematReg(MachineInstr *DefMI, MachineInstr *UseMI,
+ GCNScheduleDAGMILive &DAG,
+ const DenseMap<MachineInstr *, unsigned> &MIRegion,
+ ArrayRef<uint64_t> RegionFreq);
+
+ /// Returns whether the regions at which the register is live intersects
+ /// with the \p Target regions.
+ bool intersectWithTarget(BitVector Target) const {
+ Target &= Live;
+ return Target.any();
+ }
+
+ /// Returns whether is is always beneficial to rematerialize this register.
+ bool isAlwaysBeneficial() const {
+ // When the using region is executed a single time, we know
+ // rematerializing will be beneficial whatever the defining region's
+ // frequency.
+ if (UseFrequency == 1)
+ return true;
+ // When there is uncertainty on the defining or using frequency, we err on
+ // the conservative side and do not consider the rematerialization always
+ // beneficial.
+ if (!DefFrequency || !UseFrequency)
+ return false;
+ return UseFrequency <= DefFrequency;
+ }
+
+ /// Determines whether rematerializing the register is guaranteed to reduce
+ /// pressure in the region.
+ bool isBeneficialRegion(unsigned I) const {
+ assert(I < Live.size() && "region index out of range");
+ return LiveIn[I] && LiveOut[I] && I != UseRegion;
+ }
+
+ /// Determines whether rematerializing the register can but is not
+ /// guaranteed to reduce pressure in the region.
+ bool isMaybeBeneficialRegion(unsigned I) const {
+ assert(I < Live.size() && "region index out of range");
+ return Live[I] && !isBeneficialRegion(I);
+ }
+
+ /// Updates internal structures following a MI rematerialization. Part of
+ /// the stage instead of the DAG because it makes assumptions that are
+ /// specific to the rematerialization process.
+ MachineInstr *insertMI(unsigned RegionIdx,
+ MachineBasicBlock::iterator InsertPos,
+ GCNScheduleDAGMILive &DAG) const;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ void print(const DenseMap<MachineInstr *, unsigned> &MIRegion) const;
+#endif
+ };
+
+ /// A scored rematerializable register. Higher scores indicate more beneficial
+ /// rematerializations. Non-positive scores indicate the rematerialization is
+ /// not helpful to reduce RP in target regions.
+ struct ScoredRemat {
+ /// The rematerializable register under consideration.
+ const RematReg *Remat;
+
+ /// This only initializes state-independent characteristics of \p Remat, not
+ /// the actual score.
+ ScoredRemat(const RematReg *Remat, const GCNSubtarget &ST,
+ const TargetInstrInfo &TII);
+
+ /// Updates the rematerialization's score w.r.t. the current \p RPTargets.
+ /// \p RegionFreq indicates the frequency of each region
+ void update(const BitVector &TargetRegions, ArrayRef<GCNRPTarget> RPTargets,
+ ArrayRef<uint64_t> RegionFreq, bool ReduceSpill);
+
+ int getScore() const { return Score; }
+
+ bool operator<(const ScoredRemat &O) const { return Score < O.Score; }
+ bool operator==(const ScoredRemat &O) const { return Score == O.Score; }
+
+ private:
+ /// Estimated save/restore latency costs for spilling a register to stack.
+ /// FIXME: These numbers are very arbitrary. Need a good rationale for them,
+ /// which I don't know where to get from.
+ static constexpr int SaveCost = 100, RestoreCost = 100;
+ /// Per-region contribution weights to RP score depending on whether RP is
+ /// guaranteed or only likely to be reduced in the region. Only their
+ /// relative value w.r.t. one another matter.
+ static constexpr int WeightRP = 10, WeightRPMaybe = 5;
+
+ /// Latency gain induced by rematerializing the instruction. Does not
+ /// include estimated spilling cost of *not* rematerializing (save/restore
+ /// to/from stack).
+ std::optional<int> InstrLatencyGain = std::nullopt;
+
+ using ScoreTy = int32_t;
+ /// Overall rematerialization score. Scoring components are mapped to bit
+ /// ranges in the overall score.
+ ///
+ /// [31:1] : estimated RP reduction score
+ /// [0] : known latency gain
+ ScoreTy Score;
+
+ void resetScore() { Score = 0; }
+
+ void setUselessRemat() { Score = std::numeric_limits<ScoreTy>::min(); }
+
+ void setKnownLatencyGain() { Score |= 1; }
+
+ void setRPScore(unsigned RPScore) {
+ Score |= static_cast<ScoreTy>(RPScore) << 1;
+ }
};
- /// Maps all MIs to their parent region. MI terminators are considered to be
- /// outside the region they delimitate, and as such are not stored in the map.
+ /// Maps all MIs (except lone terminators, which are not part of any region)
+ /// to their parent region. Non-lone terminators are considered part of the
+ /// region they delimitate.
DenseMap<MachineInstr *, unsigned> MIRegion;
/// Parent MBB to each region, in region order.
SmallVector<MachineBasicBlock *> RegionBB;
- /// Collects instructions to rematerialize.
- MapVector<MachineInstr *, RematInstruction> Rematerializations;
- /// Collects regions whose live-ins or register pressure will change due to
- /// rematerializations.
- DenseMap<unsigned, GCNRegPressure> ImpactedRegions;
- /// In case we need to rollback rematerializations, save lane masks for all
- /// rematerialized registers in all regions in which they are live-ins.
- DenseMap<std::pair<unsigned, Register>, LaneBitmask> RegMasks;
- /// After successful stage initialization, indicates which regions should be
- /// rescheduled.
- BitVector RescheduleRegions;
- /// The target occupancy the stage is trying to achieve. Empty when the
+
+ /// Register pressure targets for all regions.
+ SmallVector<GCNRPTarget> RPTargets;
+ /// Regions which are above the stage's RP target.
+ BitVector TargetRegions;
+ /// The target occupancy the set is trying to achieve. Empty when the
/// objective is spilling reduction.
std::optional<unsigned> TargetOcc;
/// Achieved occupancy *only* through rematerializations (pre-rescheduling).
- /// Smaller than or equal to the target occupancy.
+ /// Smaller than or equal to the target occupancy, when it is defined.
unsigned AchievedOcc;
- /// Returns whether remat can reduce spilling or increase function occupancy
- /// by 1 through rematerialization. If it can do one, collects instructions in
- /// PreRARematStage::Rematerializations and sets the target occupancy in
- /// PreRARematStage::TargetOccupancy.
- bool canIncreaseOccupancyOrReduceSpill();
+ /// List of rematerializable registers.
+ SmallVector<RematReg, 16> RematRegs;
+
+ using RollbackReg = std::pair<MachineInstr *, const RematReg *>;
+ /// List of rematerializations to rollback if rematerialization does not end
+ /// up being beneficial. Each element pairs the MI created during
+ /// rematerialization to the original rematerializable register.
+ SmallVector<RollbackReg> Rollbackable;
+
+ /// After successful stage initialization, indicates which regions should be
+ /// rescheduled.
+ BitVector RescheduleRegions;
+
+ /// Determines the stage's objective (increasing occupancy or reducing
+ /// spilling, set in \ref TargetOcc). Defines \ref RPTargets in all regions to
+ /// achieve that objective and mark those that don't achieve it in \ref
+ /// TargetRegions.
+ void setObjective();
+
+ /// Unsets target regions in \p Regions whose RP target has been reached.
+ void unsetSatisifedRPTargets(const BitVector &Regions);
+
+ /// Fully recomputes RP from the DAG in \p Regions. Among those regions, sets
+ /// again all \ref TargetRegions that were optimistically marked as satisfied
+ /// but are actually not, and returns whether there were any such regions.
+ bool updateAndVerifyRPTargets(const BitVector &Regions);
+
+ /// Collects all rematerializable registers and appends them to \ref
+ /// RematRegs. \p RegionFreq contains the frequency of each region, 0
+ /// indicating an unknown frequency. Returns whether any rematerializable
+ /// register was found.
+ bool collectRematRegs(ArrayRef<uint64_t> RegionFreq);
+
+ /// Rematerializes \p Remat. This removes the rematerialized register from
+ /// live-in/out lists in the DAG and updates RP targets in all affected
+ /// regions, which are also marked in \ref RescheduleRegions. Regions in which
+ /// RP savings are not guaranteed are set in \p RecomputeRP. Returns the newly
+ /// created MI.
+ MachineInstr *rematerialize(const RematReg &Remat, BitVector &RecomputeRP);
+
+ /// Rollbacks rematerialization \p Rollback.
+ void rollback(const RollbackReg &Rollback) const;
/// Whether the MI is rematerializable
bool isReMaterializable(const MachineInstr &MI);
- /// Rematerializes all instructions in PreRARematStage::Rematerializations
- /// and stores the achieved occupancy after remat in
- /// PreRARematStage::AchievedOcc.
- void rematerialize();
-
/// If remat alone did not increase occupancy to the target one, rollbacks all
/// rematerializations and resets live-ins/RP in all regions impacted by the
/// stage to their pre-stage values.
void finalizeGCNSchedStage() override;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ void printTargetRegions(bool PrintAll = false) const;
+#endif
public:
bool initGCNSchedStage() override;
@@ -504,7 +647,13 @@ class PreRARematStage : public GCNSchedStage {
bool shouldRevertScheduling(unsigned WavesAfter) override;
PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
- : GCNSchedStage(StageID, DAG), RescheduleRegions(DAG.Regions.size()) {}
+ : GCNSchedStage(StageID, DAG), TargetRegions(DAG.Regions.size()),
+ RescheduleRegions(DAG.Regions.size()) {
+ const unsigned NumRegions = DAG.Regions.size();
+ RPTargets.reserve(NumRegions);
+ RegionBB.reserve(NumRegions);
+ MIRegion.reserve(MF.getInstructionCount());
+ }
};
class ILPInitialScheduleStage : public GCNSchedStage {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll
index 8a53c862371cf..5e079c94a6381 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll
@@ -885,7 +885,6 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
; SI-NEXT: s_mov_b64 s[0:1], exec
; SI-NEXT: s_wqm_b64 exec, exec
; SI-NEXT: v_cvt_i32_f32_e32 v0, v0
-; SI-NEXT: s_mov_b32 s4, 0
; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc
; SI-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
@@ -894,10 +893,11 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; SI-NEXT: s_cbranch_scc0 .LBB7_9
; SI-NEXT: ; %bb.2: ; %.demote0
-; SI-NEXT: s_wqm_b64 s[6:7], s[0:1]
-; SI-NEXT: s_and_b64 exec, exec, s[6:7]
+; SI-NEXT: s_wqm_b64 s[4:5], s[0:1]
+; SI-NEXT: s_and_b64 exec, exec, s[4:5]
; SI-NEXT: .LBB7_3: ; %.continue0.preheader
; SI-NEXT: s_or_b64 exec, exec, s[2:3]
+; SI-NEXT: s_mov_b32 s4, 0
; SI-NEXT: s_mov_b64 s[2:3], 0
; SI-NEXT: v_mov_b32_e32 v0, s4
; SI-NEXT: s_branch .LBB7_5
@@ -951,7 +951,6 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
; GFX9-NEXT: s_mov_b64 s[0:1], exec
; GFX9-NEXT: s_wqm_b64 exec, exec
; GFX9-NEXT: v_cvt_i32_f32_e32 v0, v0
-; GFX9-NEXT: s_mov_b32 s4, 0
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX9-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
@@ -960,10 +959,11 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; GFX9-NEXT: s_cbranch_scc0 .LBB7_9
; GFX9-NEXT: ; %bb.2: ; %.demote0
-; GFX9-NEXT: s_wqm_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_and_b64 exec, exec, s[6:7]
+; GFX9-NEXT: s_wqm_b64 s[4:5], s[0:1]
+; GFX9-NEXT: s_and_b64 exec, exec, s[4:5]
; GFX9-NEXT: .LBB7_3: ; %.continue0.preheader
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
+; GFX9-NEXT: s_mov_b32 s4, 0
; GFX9-NEXT: s_mov_b64 s[2:3], 0
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: s_branch .LBB7_5
@@ -1080,7 +1080,6 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
; GFX10-64-NEXT: s_mov_b64 s[0:1], exec
; GFX10-64-NEXT: s_wqm_b64 exec, exec
; GFX10-64-NEXT: v_cvt_i32_f32_e32 v0, v0
-; GFX10-64-NEXT: s_mov_b32 s4, 0
; GFX10-64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; GFX10-64-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX10-64-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
@@ -1089,11 +1088,12 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
; GFX10-64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; GFX10-64-NEXT: s_cbranch_scc0 .LBB7_9
; GFX10-64-NEXT: ; %bb.2: ; %.demote0
-; GFX10-64-NEXT: s_wqm_b64 s[6:7], s[0:1]
-; GFX10-64-NEXT: s_and_b64 exec, exec, s[6:7]
+; GFX10-64-NEXT: s_wqm_b64 s[4:5], s[0:1]
+; GFX10-64-NEXT: s_and_b64 exec, exec, s[4:5]
; GFX10-64-NEXT: .LBB7_3: ; %.continue0.preheader
; GFX10-64-NEXT: s_or_b64 exec, exec, s[2:3]
-; GFX10-64-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-64-NEXT: s_mov_b32 s2, 0
+; GFX10-64-NEXT: v_mov_b32_e32 v0, s2
; GFX10-64-NEXT: s_mov_b64 s[2:3], 0
; GFX10-64-NEXT: s_branch .LBB7_5
; GFX10-64-NEXT: .LBB7_4: ; %.continue1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll
index b33b8a7d8cd72..f1942130fc972 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll
@@ -6,34 +6,34 @@ define amdgpu_kernel void @v3i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
; GFX906: ; %bb.0: ; %entry
; GFX906-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX906-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX906-NEXT: v_lshlrev_b32_e32 v2, 2, v0
+; GFX906-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GFX906-NEXT: v_mov_b32_e32 v3, 8
-; GFX906-NEXT: v_mov_b32_e32 v5, 16
+; GFX906-NEXT: v_mov_b32_e32 v4, 16
; GFX906-NEXT: s_waitcnt lgkmcnt(0)
-; GFX906-NEXT: global_load_dword v4, v2, s[0:1]
-; GFX906-NEXT: v_mov_b32_e32 v1, 0xff
+; GFX906-NEXT: global_load_dword v2, v1, s[0:1]
; GFX906-NEXT: v_cmp_gt_u32_e32 vcc, 15, v0
; GFX906-NEXT: s_waitcnt vmcnt(0)
-; GFX906-NEXT: v_and_b32_e32 v6, 0xff, v4
-; GFX906-NEXT: v_lshlrev_b32_sdwa v7, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
-; GFX906-NEXT: v_lshlrev_b32_sdwa v4, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
-; GFX906-NEXT: v_or3_b32 v4, v6, v7, v4
+; GFX906-NEXT: v_and_b32_e32 v5, 0xff, v2
+; GFX906-NEXT: v_lshlrev_b32_sdwa v6, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
+; GFX906-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
+; GFX906-NEXT: v_or3_b32 v2, v5, v6, v2
; GFX906-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX906-NEXT: s_cbranch_execz .LBB0_2
; GFX906-NEXT: ; %bb.1: ; %bb.1
-; GFX906-NEXT: global_load_dword v0, v2, s[2:3]
+; GFX906-NEXT: global_load_dword v0, v1, s[2:3]
; GFX906-NEXT: s_waitcnt vmcnt(0)
-; GFX906-NEXT: v_and_b32_e32 v2, 0xff, v0
-; GFX906-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
-; GFX906-NEXT: v_lshlrev_b32_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
-; GFX906-NEXT: v_or3_b32 v4, v2, v3, v0
+; GFX906-NEXT: v_and_b32_e32 v1, 0xff, v0
+; GFX906-NEXT: v_lshlrev_b32_sdwa v2, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
+; GFX906-NEXT: v_lshlrev_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
+; GFX906-NEXT: v_or3_b32 v2, v1, v2, v0
; GFX906-NEXT: .LBB0_2: ; %bb.2
; GFX906-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v4
+; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v2
; GFX906-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX906-NEXT: v_lshlrev_b16_e32 v0, 8, v0
-; GFX906-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX906-NEXT: v_and_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX906-NEXT: v_mov_b32_e32 v1, 0xff
+; GFX906-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX906-NEXT: v_and_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX906-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll
index afd0f01580538..6780ca55423e9 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll
@@ -436,11 +436,11 @@ define amdgpu_kernel void @memcpy_known(ptr addrspace(7) %src, ptr addrspace(7)
; GISEL-GFX942-NEXT: s_mov_b32 s2, s7
; GISEL-GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GISEL-GFX942-NEXT: s_or_b64 s[6:7], s[6:7], s[2:3]
-; GISEL-GFX942-NEXT: v_mov_b32_e32 v0, 0x2000
-; GISEL-GFX942-NEXT: v_mov_b32_e32 v1, s16
+; GISEL-GFX942-NEXT: v_mov_b32_e32 v0, s16
+; GISEL-GFX942-NEXT: v_mov_b32_e32 v1, 0x2000
; GISEL-GFX942-NEXT: .LBB0_1: ; %load-store-loop
; GISEL-GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
-; GISEL-GFX942-NEXT: v_add_u32_e32 v62, s0, v1
+; GISEL-GFX942-NEXT: v_add_u32_e32 v62, s0, v0
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[2:5], v62, s[8:11], 0 offen
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[6:9], v62, s[8:11], 0 offen offset:16
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[10:13], v62, s[8:11], 0 offen offset:32
@@ -457,9 +457,9 @@ define amdgpu_kernel void @memcpy_known(ptr addrspace(7) %src, ptr addrspace(7)
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[54:57], v62, s[8:11], 0 offen offset:208
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[58:61], v62, s[8:11], 0 offen offset:224
; GISEL-GFX942-NEXT: buffer_load_dwordx4 a[0:3], v62, s[8:11], 0 offen offset:240
-; GISEL-GFX942-NEXT: v_add_u32_e32 v63, s12, v1
-; GISEL-GFX942-NEXT: v_add_u32_e32 v1, 0x100, v1
-; GISEL-GFX942-NEXT: v_cmp_lt_u32_e32 vcc, v1, v0
+; GISEL-GFX942-NEXT: v_add_u32_e32 v63, s12, v0
+; GISEL-GFX942-NEXT: v_add_u32_e32 v0, 0x100, v0
+; GISEL-GFX942-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0)
; GISEL-GFX942-NEXT: scratch_store_dwordx4 off, a[0:3], off ; 16-byte Folded Spill
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v63, s[4:7], 0 offen
diff --git a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
index 675acd0eedfc5..ed06594dad361 100644
--- a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
@@ -65,9 +65,9 @@ define amdgpu_kernel void @call_no_wait_after_call(ptr addrspace(1) %ptr, i32) #
; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_mov_b32 s32, 0
-; GCN-NEXT: v_mov_b32_e32 v40, 0
; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9]
-; GCN-NEXT: global_store_dword v40, v40, s[34:35]
+; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: global_store_dword v0, v0, s[34:35]
; GCN-NEXT: s_endpgm
call void @func(i32 0)
store i32 0, ptr addrspace(1) %ptr
@@ -88,9 +88,9 @@ define amdgpu_kernel void @call_no_wait_after_call_return_val(ptr addrspace(1) %
; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_mov_b32 s32, 0
-; GCN-NEXT: v_mov_b32_e32 v40, 0
; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9]
-; GCN-NEXT: global_store_dword v40, v0, s[34:35]
+; GCN-NEXT: v_mov_b32_e32 v1, 0
+; GCN-NEXT: global_store_dword v1, v0, s[34:35]
; GCN-NEXT: s_endpgm
%rv = call i32 @func.return(i32 0)
store i32 %rv, ptr addrspace(1) %ptr
diff --git a/llvm/test/CodeGen/AMDGPU/dbg-value-ends-sched-region.mir b/llvm/test/CodeGen/AMDGPU/dbg-value-ends-sched-region.mir
index b38dc4d21c10c..b734370de69a3 100644
--- a/llvm/test/CodeGen/AMDGPU/dbg-value-ends-sched-region.mir
+++ b/llvm/test/CodeGen/AMDGPU/dbg-value-ends-sched-region.mir
@@ -50,39 +50,39 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: undef [[V_MOV_B32_e32_:%[0-9]+]].sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[V_ADD_CO_U32_e64_:%[0-9]+]].sub0:vreg_64, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[DEF3]].sub0, [[DEF5]].sub0, 0, implicit $exec
- ; CHECK-NEXT: dead undef [[V_ADD_CO_U32_e64_:%[0-9]+]].sub1:vreg_64, dead [[V_ADDC_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[DEF3]].sub1, [[DEF5]].sub1, [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[DEF]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: dead [[COPY2:%[0-9]+]]:vreg_64 = COPY [[DEF8]]
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF2]]
- ; CHECK-NEXT: dead [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[DEF1]]
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[DEF4]].sub1
+ ; CHECK-NEXT: undef [[V_ADD_CO_U32_e64_:%[0-9]+]].sub0:vreg_64, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[DEF4]].sub0, [[DEF6]].sub0, 0, implicit $exec
+ ; CHECK-NEXT: dead undef [[V_ADD_CO_U32_e64_:%[0-9]+]].sub1:vreg_64, dead [[V_ADDC_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[DEF4]].sub1, [[DEF6]].sub1, [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[DEF1]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
+ ; CHECK-NEXT: dead [[COPY2:%[0-9]+]]:vreg_64 = COPY [[DEF]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF3]]
+ ; CHECK-NEXT: dead [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[DEF2]]
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[DEF5]].sub1
+ ; CHECK-NEXT: undef [[V_MOV_B32_e32_:%[0-9]+]].sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: dead [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B32_e32_]].sub0
- ; CHECK-NEXT: dead [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_I32_e64 4, [[DEF6]], implicit $exec
- ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[DEF7]], 288, 0, implicit $exec :: (store (s64), addrspace 1)
+ ; CHECK-NEXT: dead [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_I32_e64 4, [[DEF7]], implicit $exec
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[DEF8]], 288, 0, implicit $exec :: (store (s64), addrspace 1)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = COPY [[COPY3]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[DEF4:%[0-9]+]].sub1:vreg_64 = COPY [[COPY5]]
+ ; CHECK-NEXT: undef [[DEF5:%[0-9]+]].sub1:vreg_64 = COPY [[COPY5]]
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
diff --git a/llvm/test/CodeGen/AMDGPU/dbg-value-starts-sched-region.mir b/llvm/test/CodeGen/AMDGPU/dbg-value-starts-sched-region.mir
index 0785fe31d63b4..4a03416f56cc9 100644
--- a/llvm/test/CodeGen/AMDGPU/dbg-value-starts-sched-region.mir
+++ b/llvm/test/CodeGen/AMDGPU/dbg-value-starts-sched-region.mir
@@ -10,10 +10,10 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: sched
- ; CHECK: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: S_NOP 0
+ ; CHECK: S_NOP 0
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: DBG_VALUE
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: dead [[COPY:%[0-9]+]]:sgpr_32 = COPY [[DEF]]
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_ENDPGM 0
diff --git a/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir b/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir
index 156979d6d06a5..5444c2b3eda01 100644
--- a/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir
+++ b/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir
@@ -28,14 +28,6 @@ body: |
; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[V_MUL_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 1082130432, [[DEF]], implicit $mode, implicit $exec
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
@@ -51,34 +43,42 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B32_e32_]]
- ; CHECK-NEXT: [[V_MUL_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 [[DEF6]], [[DEF6]], implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MUL_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 [[DEF6]], [[DEF6]], implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MUL_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_]], implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MUL_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 [[DEF1]], [[DEF1]], implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MUL_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 [[DEF1]], [[DEF1]], implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MUL_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_]], implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[V_ADD_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e32 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_]], implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MUL_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 [[DEF6]], [[DEF6]], implicit $mode, implicit $exec
- ; CHECK-NEXT: dead [[V_MUL_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 [[V_MUL_F32_e32_4]], [[DEF12]], implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MUL_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 [[DEF1]], [[DEF1]], implicit $mode, implicit $exec
+ ; CHECK-NEXT: dead [[V_MUL_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 [[V_MUL_F32_e32_3]], [[DEF4]], implicit $mode, implicit $exec
; CHECK-NEXT: dead [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAC_F32_e32 [[V_ADD_F32_e32_]], [[COPY]], [[V_MOV_B32_e32_1]], implicit $mode, implicit $exec
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; CHECK-NEXT: $sgpr4 = IMPLICIT_DEF
- ; CHECK-NEXT: $vgpr0 = COPY [[DEF10]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[DEF3]]
; CHECK-NEXT: $vgpr0 = COPY [[V_MOV_B32_e32_]]
- ; CHECK-NEXT: $vgpr1 = COPY [[DEF6]]
- ; CHECK-NEXT: $vgpr0 = COPY [[V_MUL_F32_e32_1]]
- ; CHECK-NEXT: $vgpr1 = COPY [[V_MUL_F32_e32_2]]
- ; CHECK-NEXT: $vgpr2 = COPY [[V_MUL_F32_e32_3]]
- ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL [[DEF13]], @foo, csr_amdgpu, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $sgpr4, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit-def $vgpr0
- ; CHECK-NEXT: [[V_ADD_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e32 [[V_MUL_F32_e32_]], [[DEF7]], implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_ADD_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAC_F32_e32 [[DEF11]], [[DEF8]], [[V_ADD_F32_e32_1]], implicit $mode, implicit $exec
- ; CHECK-NEXT: dead [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_F32_e64 0, [[V_ADD_F32_e32_1]], 0, [[DEF3]], 0, [[DEF]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: dead [[V_MAD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_F32_e64 0, [[V_ADD_F32_e32_1]], 0, [[DEF4]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: dead [[V_MAD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_F32_e64 0, [[V_ADD_F32_e32_1]], 0, [[DEF5]], 0, [[DEF2]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = COPY [[DEF1]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[V_MUL_F32_e32_]]
+ ; CHECK-NEXT: $vgpr1 = COPY [[V_MUL_F32_e32_1]]
+ ; CHECK-NEXT: $vgpr2 = COPY [[V_MUL_F32_e32_2]]
+ ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL [[DEF5]], @foo, csr_amdgpu, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $sgpr4, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit-def $vgpr0
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_MUL_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 1082130432, [[DEF]], implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_ADD_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e32 [[V_MUL_F32_e32_5]], [[DEF6]], implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAC_F32_e32 [[DEF8]], [[DEF7]], [[V_ADD_F32_e32_1]], implicit $mode, implicit $exec
+ ; CHECK-NEXT: dead [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_F32_e64 0, [[V_ADD_F32_e32_1]], 0, [[DEF9]], 0, [[DEF]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: dead [[V_MAD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_F32_e64 0, [[V_ADD_F32_e32_1]], 0, [[DEF11]], 0, [[DEF10]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[V_MAD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_F32_e64 0, [[V_ADD_F32_e32_1]], 0, [[DEF13]], 0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: GLOBAL_STORE_DWORD [[DEF14]], [[DEF9]], 0, 0, implicit $exec
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD [[DEF14]], [[DEF2]], 0, 0, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1
diff --git a/llvm/test/CodeGen/AMDGPU/ds_read2-gfx1250.ll b/llvm/test/CodeGen/AMDGPU/ds_read2-gfx1250.ll
index 23d2b18f5311b..0e64b12ba3716 100644
--- a/llvm/test/CodeGen/AMDGPU/ds_read2-gfx1250.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds_read2-gfx1250.ll
@@ -817,7 +817,7 @@ define amdgpu_kernel void @ds_read_call_read(ptr addrspace(1) %out, ptr addrspac
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_load_b96 s[36:38], s[4:5], 0x0
; GFX1250-NEXT: v_and_b32_e32 v1, 0x3ff, v0
-; GFX1250-NEXT: v_dual_mov_b32 v42, 0 :: v_dual_mov_b32 v31, v0
+; GFX1250-NEXT: v_mov_b32_e32 v31, v0
; GFX1250-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1250-NEXT: s_add_nc_u64 s[8:9], s[4:5], 12
; GFX1250-NEXT: s_mov_b64 s[12:13], void_func_void at abs64
@@ -831,8 +831,8 @@ define amdgpu_kernel void @ds_read_call_read(ptr addrspace(1) %out, ptr addrspac
; GFX1250-NEXT: s_swap_pc_i64 s[30:31], s[12:13]
; GFX1250-NEXT: ds_load_b32 v0, v40 offset:4
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: v_add_nc_u32_e32 v0, v41, v0
-; GFX1250-NEXT: global_store_b32 v42, v0, s[36:37]
+; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_add_nc_u32 v0, v41, v0
+; GFX1250-NEXT: global_store_b32 v1, v0, s[36:37]
; GFX1250-NEXT: s_endpgm
%x = call i32 @llvm.amdgcn.workitem.id.x()
%arrayidx0 = getelementptr i32, ptr addrspace(3) %arg, i32 %x
diff --git a/llvm/test/CodeGen/AMDGPU/ds_read2.ll b/llvm/test/CodeGen/AMDGPU/ds_read2.ll
index 9f1b55ea3b1ef..a89cd9ceff154 100644
--- a/llvm/test/CodeGen/AMDGPU/ds_read2.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds_read2.ll
@@ -1384,8 +1384,8 @@ define amdgpu_kernel void @ds_read_call_read(ptr addrspace(1) %out, ptr addrspac
; GFX9-NEXT: s_add_u32 s8, s4, 12
; GFX9-NEXT: s_addc_u32 s9, s5, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: v_lshl_add_u32 v41, v0, 2, s6
-; GFX9-NEXT: ds_read_b32 v42, v41
+; GFX9-NEXT: v_lshl_add_u32 v40, v0, 2, s6
+; GFX9-NEXT: ds_read_b32 v41, v40
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
@@ -1395,12 +1395,12 @@ define amdgpu_kernel void @ds_read_call_read(ptr addrspace(1) %out, ptr addrspac
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: v_mov_b32_e32 v40, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-NEXT: ds_read_b32 v0, v41 offset:4
+; GFX9-NEXT: ds_read_b32 v0, v40 offset:4
+; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: v_add_u32_e32 v0, v42, v0
-; GFX9-NEXT: global_store_dword v40, v0, s[34:35]
+; GFX9-NEXT: v_add_u32_e32 v0, v41, v0
+; GFX9-NEXT: global_store_dword v1, v0, s[34:35]
; GFX9-NEXT: s_endpgm
%x = call i32 @llvm.amdgcn.workitem.id.x()
%arrayidx0 = getelementptr i32, ptr addrspace(3) %arg, i32 %x
diff --git a/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll b/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll
index c5db7a33f70e0..fda27243625cf 100644
--- a/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll
@@ -1894,7 +1894,6 @@ define void @test_dynamic_stackalloc_device_multiple_allocas(i32 %n, i32 %m) {
; GFX9-SDAG-NEXT: .LBB14_6: ; %bb.1
; GFX9-SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15
-; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 2
; GFX9-SDAG-NEXT: v_and_b32_e32 v0, -16, v0
; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec
; GFX9-SDAG-NEXT: .LBB14_7: ; =>This Inner Loop Header: Depth=1
@@ -1912,7 +1911,8 @@ define void @test_dynamic_stackalloc_device_multiple_allocas(i32 %n, i32 %m) {
; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 1
; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], s33
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: buffer_store_dword v1, off, s[0:3], s4
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 2
+; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], s4
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX9-SDAG-NEXT: s_mov_b32 s32, s34
; GFX9-SDAG-NEXT: s_mov_b32 s34, s14
@@ -2059,31 +2059,30 @@ define void @test_dynamic_stackalloc_device_multiple_allocas(i32 %n, i32 %m) {
; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v1
; GFX11-SDAG-NEXT: .LBB14_6: ; %bb.1
; GFX11-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s1
-; GFX11-SDAG-NEXT: v_lshl_add_u32 v1, v0, 2, 15
-; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 2
+; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15
; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_and_b32_e32 v1, -16, v1
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_and_b32_e32 v0, -16, v0
; GFX11-SDAG-NEXT: .LBB14_7: ; =>This Inner Loop Header: Depth=1
; GFX11-SDAG-NEXT: s_ctz_i32_b32 s2, s1
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX11-SDAG-NEXT: v_readlane_b32 s3, v1, s2
+; GFX11-SDAG-NEXT: v_readlane_b32 s3, v0, s2
; GFX11-SDAG-NEXT: s_bitset0_b32 s1, s2
; GFX11-SDAG-NEXT: s_max_u32 s0, s0, s3
; GFX11-SDAG-NEXT: s_cmp_lg_u32 s1, 0
; GFX11-SDAG-NEXT: s_cbranch_scc1 .LBB14_7
; GFX11-SDAG-NEXT: ; %bb.8:
; GFX11-SDAG-NEXT: s_mov_b32 s1, s32
-; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 1
-; GFX11-SDAG-NEXT: v_lshl_add_u32 v1, s0, 5, s1
-; GFX11-SDAG-NEXT: scratch_store_b32 off, v2, s33 dlc
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2
+; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, s0, 5, s1
+; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s33 dlc
; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-SDAG-NEXT: scratch_store_b32 off, v0, s1 dlc
+; GFX11-SDAG-NEXT: scratch_store_b32 off, v2, s1 dlc
; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v1
+; GFX11-SDAG-NEXT: s_mov_b32 s33, s7
+; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v0
; GFX11-SDAG-NEXT: s_mov_b32 s32, s34
; GFX11-SDAG-NEXT: s_mov_b32 s34, s8
-; GFX11-SDAG-NEXT: s_mov_b32 s33, s7
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_multiple_allocas:
diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll
index db32135939a5d..0f5f9a54c11be 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll
+++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll
@@ -5,14 +5,15 @@ define amdgpu_gs i32 @main() {
; CHECK-LABEL: main:
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: s_bitcmp1_b32 0, 0
-; CHECK-NEXT: s_mov_b32 s0, 0
-; CHECK-NEXT: s_cselect_b32 s1, -1, 0
-; CHECK-NEXT: s_or_saveexec_b32 s2, -1
-; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s1
+; CHECK-NEXT: s_cselect_b32 s0, -1, 0
+; CHECK-NEXT: s_or_saveexec_b32 s1, -1
+; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; CHECK-NEXT: v_readfirstlane_b32 s1, v0
-; CHECK-NEXT: s_mov_b32 exec_lo, s2
-; CHECK-NEXT: s_or_b32 s0, s0, s1
+; CHECK-NEXT: v_readfirstlane_b32 s0, v0
+; CHECK-NEXT: s_mov_b32 exec_lo, s1
+; CHECK-NEXT: s_mov_b32 s1, 0
+; CHECK-NEXT: s_wait_alu 0xfffe
+; CHECK-NEXT: s_or_b32 s0, s1, s0
; CHECK-NEXT: s_wait_alu 0xfffe
; CHECK-NEXT: s_bitcmp1_b32 s0, 0
; CHECK-NEXT: s_cselect_b32 s0, -1, 0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll
index a7ebf458d2591..8efcc55a58ef6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll
@@ -244,7 +244,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-LABEL: divergent_value:
; GFX9DAGISEL: ; %bb.0: ; %entry
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -256,6 +255,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
@@ -282,7 +282,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-LABEL: divergent_value:
; GFX1064DAGISEL: ; %bb.0: ; %entry
; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -294,6 +293,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1064DAGISEL-NEXT: s_endpgm
@@ -320,7 +320,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-LABEL: divergent_value:
; GFX1032DAGISEL: ; %bb.0: ; %entry
; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0
; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -332,6 +331,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1032DAGISEL-NEXT: s_endpgm
@@ -358,7 +358,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-LABEL: divergent_value:
; GFX1164DAGISEL: ; %bb.0: ; %entry
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0
@@ -372,6 +371,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1164DAGISEL-NEXT: s_endpgm
@@ -400,7 +400,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-LABEL: divergent_value:
; GFX1132DAGISEL: ; %bb.0: ; %entry
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0
; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -412,7 +412,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
@@ -440,7 +440,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX12DAGISEL-LABEL: divergent_value:
; GFX12DAGISEL: ; %bb.0: ; %entry
; GFX12DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX12DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX12DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX12DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX12DAGISEL-NEXT: s_mov_b32 s2, 0
; GFX12DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -452,7 +452,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX12DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
; GFX12DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX12DAGISEL-NEXT: ; %bb.2:
-; GFX12DAGISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX12DAGISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
; GFX12DAGISEL-NEXT: s_wait_kmcnt 0x0
; GFX12DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX12DAGISEL-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll
index f39dd867f9580..ec27806c2da85 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll
@@ -168,7 +168,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-LABEL: divergent_value:
; GFX9DAGISEL: ; %bb.0: ; %entry
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s4, -1
; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -180,6 +179,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
@@ -206,7 +206,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-LABEL: divergent_value:
; GFX1064DAGISEL: ; %bb.0: ; %entry
; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, -1
; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -218,6 +217,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1064DAGISEL-NEXT: s_endpgm
@@ -244,7 +244,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-LABEL: divergent_value:
; GFX1032DAGISEL: ; %bb.0: ; %entry
; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, -1
; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -256,6 +255,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1032DAGISEL-NEXT: s_endpgm
@@ -282,7 +282,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-LABEL: divergent_value:
; GFX1164DAGISEL: ; %bb.0: ; %entry
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s4, -1
@@ -296,6 +295,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1164DAGISEL-NEXT: s_endpgm
@@ -324,7 +324,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-LABEL: divergent_value:
; GFX1132DAGISEL: ; %bb.0: ; %entry
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, -1
; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -336,7 +336,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll
index 6f299ab8bb9cf..3c84348ab7064 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll
@@ -168,7 +168,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-LABEL: divergent_value:
; GFX9DAGISEL: ; %bb.0: ; %entry
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_brev_b32 s4, 1
; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -180,6 +179,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
@@ -206,7 +206,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-LABEL: divergent_value:
; GFX1064DAGISEL: ; %bb.0: ; %entry
; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_brev_b32 s4, 1
; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -218,6 +217,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1064DAGISEL-NEXT: s_endpgm
@@ -244,7 +244,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-LABEL: divergent_value:
; GFX1032DAGISEL: ; %bb.0: ; %entry
; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032DAGISEL-NEXT: s_brev_b32 s2, 1
; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -256,6 +255,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1032DAGISEL-NEXT: s_endpgm
@@ -282,7 +282,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-LABEL: divergent_value:
; GFX1164DAGISEL: ; %bb.0: ; %entry
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_brev_b32 s4, 1
@@ -296,6 +295,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1164DAGISEL-NEXT: s_endpgm
@@ -324,7 +324,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-LABEL: divergent_value:
; GFX1132DAGISEL: ; %bb.0: ; %entry
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132DAGISEL-NEXT: s_brev_b32 s2, 1
; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -336,7 +336,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll
index 3c4cbc74aedc1..29a558c1ee169 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll
@@ -168,7 +168,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-LABEL: divergent_value:
; GFX9DAGISEL: ; %bb.0: ; %entry
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_brev_b32 s4, -2
; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -180,6 +179,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
@@ -206,7 +206,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-LABEL: divergent_value:
; GFX1064DAGISEL: ; %bb.0: ; %entry
; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_brev_b32 s4, -2
; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -218,6 +217,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1064DAGISEL-NEXT: s_endpgm
@@ -244,7 +244,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-LABEL: divergent_value:
; GFX1032DAGISEL: ; %bb.0: ; %entry
; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032DAGISEL-NEXT: s_brev_b32 s2, -2
; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -256,6 +255,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1032DAGISEL-NEXT: s_endpgm
@@ -282,7 +282,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-LABEL: divergent_value:
; GFX1164DAGISEL: ; %bb.0: ; %entry
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_brev_b32 s4, -2
@@ -296,6 +295,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1164DAGISEL-NEXT: s_endpgm
@@ -324,7 +324,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-LABEL: divergent_value:
; GFX1132DAGISEL: ; %bb.0: ; %entry
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132DAGISEL-NEXT: s_brev_b32 s2, -2
; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -336,7 +336,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll
index d6ccf7ce2831d..2a3119ea97ff9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll
@@ -168,7 +168,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-LABEL: divergent_value:
; GFX9DAGISEL: ; %bb.0: ; %entry
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -180,6 +179,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
@@ -206,7 +206,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-LABEL: divergent_value:
; GFX1064DAGISEL: ; %bb.0: ; %entry
; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -218,6 +217,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1064DAGISEL-NEXT: s_endpgm
@@ -244,7 +244,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-LABEL: divergent_value:
; GFX1032DAGISEL: ; %bb.0: ; %entry
; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0
; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -256,6 +255,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1032DAGISEL-NEXT: s_endpgm
@@ -282,7 +282,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-LABEL: divergent_value:
; GFX1164DAGISEL: ; %bb.0: ; %entry
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0
@@ -296,6 +295,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1164DAGISEL-NEXT: s_endpgm
@@ -324,7 +324,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-LABEL: divergent_value:
; GFX1132DAGISEL: ; %bb.0: ; %entry
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0
; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -336,7 +336,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll
index fab269ea8cfb9..5f1bcec126938 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll
@@ -259,7 +259,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-LABEL: divergent_value:
; GFX9DAGISEL: ; %bb.0: ; %entry
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -271,6 +270,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
@@ -297,7 +297,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-LABEL: divergent_value:
; GFX1064DAGISEL: ; %bb.0: ; %entry
; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -309,6 +308,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1064DAGISEL-NEXT: s_endpgm
@@ -335,7 +335,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-LABEL: divergent_value:
; GFX1032DAGISEL: ; %bb.0: ; %entry
; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0
; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -347,6 +346,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1032DAGISEL-NEXT: s_endpgm
@@ -373,7 +373,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-LABEL: divergent_value:
; GFX1164DAGISEL: ; %bb.0: ; %entry
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0
@@ -387,6 +386,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1164DAGISEL-NEXT: s_endpgm
@@ -415,7 +415,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-LABEL: divergent_value:
; GFX1132DAGISEL: ; %bb.0: ; %entry
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0
; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -427,7 +427,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
@@ -455,7 +455,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX12DAGISEL-LABEL: divergent_value:
; GFX12DAGISEL: ; %bb.0: ; %entry
; GFX12DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX12DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX12DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX12DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX12DAGISEL-NEXT: s_mov_b32 s2, 0
; GFX12DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -467,7 +467,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX12DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
; GFX12DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX12DAGISEL-NEXT: ; %bb.2:
-; GFX12DAGISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX12DAGISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
; GFX12DAGISEL-NEXT: s_wait_kmcnt 0x0
; GFX12DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX12DAGISEL-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
index 54c8e2e248f57..791f1b59fe83e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
@@ -169,7 +169,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX9DAGISEL-LABEL: divergent_value:
; GFX9DAGISEL: ; %bb.0: ; %entry
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -181,6 +180,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
@@ -207,7 +207,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX1064DAGISEL-LABEL: divergent_value:
; GFX1064DAGISEL: ; %bb.0: ; %entry
; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -219,6 +218,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1064DAGISEL-NEXT: s_endpgm
@@ -245,7 +245,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX1032DAGISEL-LABEL: divergent_value:
; GFX1032DAGISEL: ; %bb.0: ; %entry
; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0
; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -257,6 +256,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1032DAGISEL-NEXT: s_endpgm
@@ -283,7 +283,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX1164DAGISEL-LABEL: divergent_value:
; GFX1164DAGISEL: ; %bb.0: ; %entry
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0
@@ -297,6 +296,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1164DAGISEL-NEXT: s_endpgm
@@ -325,7 +325,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX1132DAGISEL-LABEL: divergent_value:
; GFX1132DAGISEL: ; %bb.0: ; %entry
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0
; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -337,7 +337,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
index 502ef84449751..25ba296a76524 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
@@ -169,7 +169,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-LABEL: divergent_value:
; GFX9DAGISEL: ; %bb.0: ; %entry
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s4, -1
; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -181,6 +180,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
@@ -207,7 +207,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-LABEL: divergent_value:
; GFX1064DAGISEL: ; %bb.0: ; %entry
; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, -1
; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -219,6 +218,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1064DAGISEL-NEXT: s_endpgm
@@ -245,7 +245,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-LABEL: divergent_value:
; GFX1032DAGISEL: ; %bb.0: ; %entry
; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, -1
; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -257,6 +256,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1032DAGISEL-NEXT: s_endpgm
@@ -283,7 +283,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-LABEL: divergent_value:
; GFX1164DAGISEL: ; %bb.0: ; %entry
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s4, -1
@@ -297,6 +296,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1164DAGISEL-NEXT: s_endpgm
@@ -325,7 +325,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-LABEL: divergent_value:
; GFX1132DAGISEL: ; %bb.0: ; %entry
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, -1
; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -337,7 +337,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll
index d5f1750c268ab..23f46ab185f98 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll
@@ -244,7 +244,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-LABEL: divergent_value:
; GFX9DAGISEL: ; %bb.0: ; %entry
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -256,6 +255,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
@@ -282,7 +282,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-LABEL: divergent_value:
; GFX1064DAGISEL: ; %bb.0: ; %entry
; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -294,6 +293,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1064DAGISEL-NEXT: s_endpgm
@@ -320,7 +320,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-LABEL: divergent_value:
; GFX1032DAGISEL: ; %bb.0: ; %entry
; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0
; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -332,6 +331,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1032DAGISEL-NEXT: s_endpgm
@@ -358,7 +358,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-LABEL: divergent_value:
; GFX1164DAGISEL: ; %bb.0: ; %entry
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0
@@ -372,6 +371,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1164DAGISEL-NEXT: s_endpgm
@@ -400,7 +400,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-LABEL: divergent_value:
; GFX1132DAGISEL: ; %bb.0: ; %entry
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0
; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -412,7 +412,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-rematerialization-scoring.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-rematerialization-scoring.mir
new file mode 100644
index 0000000000000..2fd9abace39a8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-rematerialization-scoring.mir
@@ -0,0 +1,535 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -run-pass=machine-scheduler -amdgpu-disable-unclustered-high-rp-reschedule -verify-machineinstrs %s -o - | FileCheck %s
+
+# All tests are almost identical, the only differences being that some
+# VGPR-defining instructions are progressively made artificially
+# unrematerializable with an implicit def to test rematerialization
+# priorities. The CFG is the following for all tests in the file.
+#
+# +---+
+# | 0 |
+# +---+
+# |
+# v
+# +---+
+# +------>| 1 |-----+
+# | +---+ |
+# | | v
+# | | +---+
+# | | | 2 |
+# | | +-+-+
+# | v |
+# +---+ +---+ |
+# | 4 |<----| 3 |<----+
+# +---+ +---+
+# |
+# v
+# +---+
+# | 5 |
+# +---+
+
+--- |
+ define void @favor_always_benef() {
+ ret void
+ }
+ define void @favor_live_through_in_high_freq_region() {
+ ret void
+ }
+ define void @use_only_region_possible() {
+ ret void
+ }
+---
+# Rematerializing %32 is always beneficial because the defining and using
+# regions have the same frequency. It should be rematerialized first.
+name: favor_always_benef
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; CHECK-LABEL: name: favor_always_benef
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %mem_addr:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: %loop_if_bound:vgpr_32(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
+ ; CHECK-NEXT: %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, %loop_if_bound(s32), implicit $exec
+ ; CHECK-NEXT: %loop_counter:sreg_32 = COPY %mem_data.sub1
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %exec_save_if:sreg_64 = COPY $exec, implicit-def $exec
+ ; CHECK-NEXT: %exec_if:sreg_64 = S_AND_B64 %exec_save_if, %exec_loop_mask, implicit-def dead $scc
+ ; CHECK-NEXT: $exec = S_MOV_B64_term %exec_if
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_24]], implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x7c000000), %bb.5(0x04000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $exec = S_OR_B64 $exec, %exec_save_if, implicit-def $scc
+ ; CHECK-NEXT: %loop_counter:sreg_32 = S_ADD_I32 %loop_counter, -1, implicit-def dead $scc
+ ; CHECK-NEXT: S_CMP_LG_U32 %loop_counter, 0, implicit-def $scc
+ ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.5, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]]
+ ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]], implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]], implicit [[V_CVT_I32_F64_e32_15]]
+ ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]], implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]]
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_32]]
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr0_sgpr1
+
+ %mem_addr:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ %loop_if_bound:vgpr_32(s32) = COPY $vgpr0
+ %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, killed %loop_if_bound(s32), implicit $exec
+ %loop_counter:sreg_32 = COPY %mem_data.sub1
+
+ %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
+ %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
+ %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
+ %3:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
+ %4:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode
+ %5:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode
+ %6:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode
+ %7:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode
+ %8:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode
+ %9:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode
+ %10:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode
+ %11:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode
+ %12:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode
+ %13:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode
+ %14:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode
+ %15:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode
+ %16:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode
+ %17:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode
+ %18:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode
+ %19:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode
+ %20:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode
+ %21:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode
+ %22:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
+ %23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
+ %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode
+ %25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
+ %26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode
+ %27:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
+ %28:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
+ %29:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
+ %30:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
+ %31:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
+ %32:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
+
+ bb.1:
+ successors: %bb.2, %bb.3
+
+ %exec_save_if:sreg_64 = COPY $exec, implicit-def $exec
+ %exec_if:sreg_64 = S_AND_B64 %exec_save_if, %exec_loop_mask, implicit-def dead $scc
+ $exec = S_MOV_B64_term %exec_if
+ S_CBRANCH_EXECZ %bb.3, implicit $exec
+ S_BRANCH %bb.2
+
+ bb.2:
+ successors: %bb.3
+
+ S_NOP 0, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31
+
+ bb.3:
+ successors: %bb.4(0x7c000000), %bb.5(0x04000000)
+
+ $exec = S_OR_B64 $exec, %exec_save_if, implicit-def $scc
+ %loop_counter:sreg_32 = S_ADD_I32 %loop_counter, -1, implicit-def dead $scc
+ S_CMP_LG_U32 %loop_counter, 0, implicit-def $scc
+ S_CBRANCH_SCC0 %bb.5, implicit killed $scc
+
+ bb.4:
+ successors: %bb.1
+
+ S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7
+ S_NOP 0, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15
+ S_NOP 0, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23
+
+ S_BRANCH %bb.1
+
+ bb.5:
+
+ S_NOP 0, implicit %32
+
+ S_ENDPGM 0
+...
+---
+# Rematerializing registers used in bb.2 is more beneficial than rematerializing
+# registers in bb.4 since they are live-through in higher frequency regions
+# (bb.4), which contribute more to the score.
+name: favor_live_through_in_high_freq_region
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; CHECK-LABEL: name: favor_live_through_in_high_freq_region
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %mem_addr:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: %loop_if_bound:vgpr_32(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
+ ; CHECK-NEXT: %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, %loop_if_bound(s32), implicit $exec
+ ; CHECK-NEXT: %loop_counter:sreg_32 = COPY %mem_data.sub1
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %exec_save_if:sreg_64 = COPY $exec, implicit-def $exec
+ ; CHECK-NEXT: %exec_if:sreg_64 = S_AND_B64 %exec_save_if, %exec_loop_mask, implicit-def dead $scc
+ ; CHECK-NEXT: $exec = S_MOV_B64_term %exec_if
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]], implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_31]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x7c000000), %bb.5(0x04000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $exec = S_OR_B64 $exec, %exec_save_if, implicit-def $scc
+ ; CHECK-NEXT: %loop_counter:sreg_32 = S_ADD_I32 %loop_counter, -1, implicit-def dead $scc
+ ; CHECK-NEXT: S_CMP_LG_U32 %loop_counter, 0, implicit-def $scc
+ ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.5, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]]
+ ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]], implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]], implicit [[V_CVT_I32_F64_e32_15]]
+ ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]], implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_32]]
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]]
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr0_sgpr1
+
+ %mem_addr:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ %loop_if_bound:vgpr_32(s32) = COPY $vgpr0
+ %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, killed %loop_if_bound(s32), implicit $exec
+ %loop_counter:sreg_32 = COPY %mem_data.sub1
+
+ %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
+ %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
+ %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
+ %3:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
+ %4:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode
+ %5:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode
+ %6:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode
+ %7:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode
+ %8:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode
+ %9:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode
+ %10:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode
+ %11:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode
+ %12:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode
+ %13:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode
+ %14:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode
+ %15:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode
+ %16:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode
+ %17:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode
+ %18:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode
+ %19:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode
+ %20:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode
+ %21:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode
+ %22:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
+ %23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
+ %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode
+ %25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
+ %26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode
+ %27:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
+ %28:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
+ %29:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
+ %30:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
+ %31:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
+ %32:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
+
+ bb.1:
+ successors: %bb.2, %bb.3
+
+ %exec_save_if:sreg_64 = COPY $exec, implicit-def $exec
+ %exec_if:sreg_64 = S_AND_B64 %exec_save_if, %exec_loop_mask, implicit-def dead $scc
+ $exec = S_MOV_B64_term %exec_if
+ S_CBRANCH_EXECZ %bb.3, implicit $exec
+ S_BRANCH %bb.2
+
+ bb.2:
+ successors: %bb.3
+
+ S_NOP 0, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31
+
+ bb.3:
+ successors: %bb.4(0x7c000000), %bb.5(0x04000000)
+
+ $exec = S_OR_B64 $exec, %exec_save_if, implicit-def $scc
+ %loop_counter:sreg_32 = S_ADD_I32 %loop_counter, -1, implicit-def dead $scc
+ S_CMP_LG_U32 %loop_counter, 0, implicit-def $scc
+ S_CBRANCH_SCC0 %bb.5, implicit killed $scc
+
+ bb.4:
+ successors: %bb.1
+
+ S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7
+ S_NOP 0, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15
+ S_NOP 0, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23
+
+ S_BRANCH %bb.1
+
+ bb.5:
+
+ S_NOP 0, implicit %32
+
+ S_ENDPGM 0
+...
+---
+# Rematerializing registers used in bb.4 is the only option.
+name: use_only_region_possible
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; CHECK-LABEL: name: use_only_region_possible
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %mem_addr:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: %loop_if_bound:vgpr_32(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
+ ; CHECK-NEXT: %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, %loop_if_bound(s32), implicit $exec
+ ; CHECK-NEXT: %loop_counter:sreg_32 = COPY %mem_data.sub1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %exec_save_if:sreg_64 = COPY $exec, implicit-def $exec
+ ; CHECK-NEXT: %exec_if:sreg_64 = S_AND_B64 %exec_save_if, %exec_loop_mask, implicit-def dead $scc
+ ; CHECK-NEXT: $exec = S_MOV_B64_term %exec_if
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]], implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x7c000000), %bb.5(0x04000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $exec = S_OR_B64 $exec, %exec_save_if, implicit-def $scc
+ ; CHECK-NEXT: %loop_counter:sreg_32 = S_ADD_I32 %loop_counter, -1, implicit-def dead $scc
+ ; CHECK-NEXT: S_CMP_LG_U32 %loop_counter, 0, implicit-def $scc
+ ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.5, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]]
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]], implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]], implicit [[V_CVT_I32_F64_e32_24]]
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_25]]
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]]
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr0_sgpr1
+
+ %mem_addr:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ %loop_if_bound:vgpr_32(s32) = COPY $vgpr0
+ %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, killed %loop_if_bound(s32), implicit $exec
+ %loop_counter:sreg_32 = COPY %mem_data.sub1
+
+ %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
+ %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
+ %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
+ %3:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
+ %4:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode
+ %5:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode
+ %6:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode
+ %7:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode
+ %8:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode
+ %9:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode
+ %10:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode
+ %11:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode
+ %12:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode
+ %13:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode
+ %14:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode
+ %15:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode
+ %16:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode
+ %17:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode
+ %18:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode
+ %19:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode
+ %20:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode
+ %21:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode
+ %22:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
+ %23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
+ %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ %25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ %26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ %27:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
+ %28:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
+ %29:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
+ %30:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
+ %31:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
+ %32:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
+
+ bb.1:
+ successors: %bb.2, %bb.3
+
+ %exec_save_if:sreg_64 = COPY $exec, implicit-def $exec
+ %exec_if:sreg_64 = S_AND_B64 %exec_save_if, %exec_loop_mask, implicit-def dead $scc
+ $exec = S_MOV_B64_term %exec_if
+ S_CBRANCH_EXECZ %bb.3, implicit $exec
+ S_BRANCH %bb.2
+
+ bb.2:
+ successors: %bb.3
+
+ S_NOP 0, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31
+
+ bb.3:
+ successors: %bb.4(0x7c000000), %bb.5(0x04000000)
+
+ $exec = S_OR_B64 $exec, %exec_save_if, implicit-def $scc
+ %loop_counter:sreg_32 = S_ADD_I32 %loop_counter, -1, implicit-def dead $scc
+ S_CMP_LG_U32 %loop_counter, 0, implicit-def $scc
+ S_CBRANCH_SCC0 %bb.5, implicit killed $scc
+
+ bb.4:
+ successors: %bb.1
+
+ S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7
+ S_NOP 0, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15
+ S_NOP 0, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23
+
+ S_BRANCH %bb.1
+
+ bb.5:
+
+ S_NOP 0, implicit %32
+
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir
index 3b3ea3f37db80..7f14afce3ff96 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir
@@ -37,208 +37,206 @@ body: |
; GFX908: bb.0:
; GFX908-NEXT: successors: %bb.1(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
- ; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 2
- ; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 3
- ; GFX908-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 4
- ; GFX908-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 5
- ; GFX908-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 6
- ; GFX908-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 7
- ; GFX908-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 8
- ; GFX908-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 9
- ; GFX908-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 10
- ; GFX908-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
- ; GFX908-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 12
- ; GFX908-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 13
- ; GFX908-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 14
- ; GFX908-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 15
- ; GFX908-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 16
- ; GFX908-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 17
- ; GFX908-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 18
- ; GFX908-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 19
- ; GFX908-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 20
- ; GFX908-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 21
- ; GFX908-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
- ; GFX908-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
- ; GFX908-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
- ; GFX908-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
- ; GFX908-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
- ; GFX908-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
- ; GFX908-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
- ; GFX908-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
- ; GFX908-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 30
- ; GFX908-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 31
- ; GFX908-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 32
- ; GFX908-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 33
- ; GFX908-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 34
- ; GFX908-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 35
- ; GFX908-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 36
- ; GFX908-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 37
- ; GFX908-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 38
- ; GFX908-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 39
- ; GFX908-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 40
- ; GFX908-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 41
- ; GFX908-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 42
- ; GFX908-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 43
- ; GFX908-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 44
- ; GFX908-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 45
- ; GFX908-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 46
- ; GFX908-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
- ; GFX908-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 48
- ; GFX908-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 49
- ; GFX908-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 50
- ; GFX908-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 51
- ; GFX908-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 52
- ; GFX908-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 53
- ; GFX908-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 54
- ; GFX908-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 55
- ; GFX908-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 56
- ; GFX908-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 57
- ; GFX908-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 58
- ; GFX908-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 59
- ; GFX908-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 60
- ; GFX908-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 61
- ; GFX908-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 62
- ; GFX908-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 63
- ; GFX908-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 64
- ; GFX908-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 65
- ; GFX908-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 66
- ; GFX908-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 67
- ; GFX908-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 68
- ; GFX908-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
- ; GFX908-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
- ; GFX908-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
- ; GFX908-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
- ; GFX908-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
- ; GFX908-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
- ; GFX908-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
- ; GFX908-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
- ; GFX908-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
- ; GFX908-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
- ; GFX908-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
- ; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
- ; GFX908-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_79]], implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_4]], implicit [[S_MOV_B32_5]], implicit [[S_MOV_B32_6]], implicit [[S_MOV_B32_7]], implicit [[S_MOV_B32_8]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_9]], implicit [[S_MOV_B32_10]], implicit [[S_MOV_B32_11]], implicit [[S_MOV_B32_12]], implicit [[S_MOV_B32_13]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_14]], implicit [[S_MOV_B32_15]], implicit [[S_MOV_B32_16]], implicit [[S_MOV_B32_17]], implicit [[S_MOV_B32_18]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_19]], implicit [[S_MOV_B32_20]], implicit [[S_MOV_B32_21]], implicit [[S_MOV_B32_22]], implicit [[S_MOV_B32_23]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_24]], implicit [[S_MOV_B32_25]], implicit [[S_MOV_B32_26]], implicit [[S_MOV_B32_27]], implicit [[S_MOV_B32_28]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_29]], implicit [[S_MOV_B32_30]], implicit [[S_MOV_B32_31]], implicit [[S_MOV_B32_32]], implicit [[S_MOV_B32_33]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_34]], implicit [[S_MOV_B32_35]], implicit [[S_MOV_B32_36]], implicit [[S_MOV_B32_37]], implicit [[S_MOV_B32_38]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_39]], implicit [[S_MOV_B32_40]], implicit [[S_MOV_B32_41]], implicit [[S_MOV_B32_42]], implicit [[S_MOV_B32_43]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_44]], implicit [[S_MOV_B32_45]], implicit [[S_MOV_B32_46]], implicit [[S_MOV_B32_47]], implicit [[S_MOV_B32_48]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_49]], implicit [[S_MOV_B32_50]], implicit [[S_MOV_B32_51]], implicit [[S_MOV_B32_52]], implicit [[S_MOV_B32_53]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_54]], implicit [[S_MOV_B32_55]], implicit [[S_MOV_B32_56]], implicit [[S_MOV_B32_57]], implicit [[S_MOV_B32_58]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_59]], implicit [[S_MOV_B32_60]], implicit [[S_MOV_B32_61]], implicit [[S_MOV_B32_62]], implicit [[S_MOV_B32_63]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_64]], implicit [[S_MOV_B32_65]], implicit [[S_MOV_B32_66]], implicit [[S_MOV_B32_67]], implicit [[S_MOV_B32_68]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_69]], implicit [[S_MOV_B32_70]], implicit [[S_MOV_B32_71]], implicit [[S_MOV_B32_72]], implicit [[S_MOV_B32_73]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_74]], implicit [[S_MOV_B32_75]], implicit [[S_MOV_B32_76]], implicit [[S_MOV_B32_77]], implicit [[S_MOV_B32_78]]
+ ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+ ; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
+ ; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 2
+ ; GFX908-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 3
+ ; GFX908-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 4
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
+ ; GFX908-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 5
+ ; GFX908-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 6
+ ; GFX908-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 7
+ ; GFX908-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 8
+ ; GFX908-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 9
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_5]], implicit [[S_MOV_B32_6]], implicit [[S_MOV_B32_7]], implicit [[S_MOV_B32_8]], implicit [[S_MOV_B32_9]]
+ ; GFX908-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 10
+ ; GFX908-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
+ ; GFX908-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 12
+ ; GFX908-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 13
+ ; GFX908-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 14
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_10]], implicit [[S_MOV_B32_11]], implicit [[S_MOV_B32_12]], implicit [[S_MOV_B32_13]], implicit [[S_MOV_B32_14]]
+ ; GFX908-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 15
+ ; GFX908-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 16
+ ; GFX908-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 17
+ ; GFX908-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 18
+ ; GFX908-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 19
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_15]], implicit [[S_MOV_B32_16]], implicit [[S_MOV_B32_17]], implicit [[S_MOV_B32_18]], implicit [[S_MOV_B32_19]]
+ ; GFX908-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 20
+ ; GFX908-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 21
+ ; GFX908-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
+ ; GFX908-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
+ ; GFX908-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_20]], implicit [[S_MOV_B32_21]], implicit [[S_MOV_B32_22]], implicit [[S_MOV_B32_23]], implicit [[S_MOV_B32_24]]
+ ; GFX908-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
+ ; GFX908-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
+ ; GFX908-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
+ ; GFX908-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
+ ; GFX908-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_25]], implicit [[S_MOV_B32_26]], implicit [[S_MOV_B32_27]], implicit [[S_MOV_B32_28]], implicit [[S_MOV_B32_29]]
+ ; GFX908-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 30
+ ; GFX908-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 31
+ ; GFX908-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 32
+ ; GFX908-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 33
+ ; GFX908-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 34
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_30]], implicit [[S_MOV_B32_31]], implicit [[S_MOV_B32_32]], implicit [[S_MOV_B32_33]], implicit [[S_MOV_B32_34]]
+ ; GFX908-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 35
+ ; GFX908-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 36
+ ; GFX908-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 37
+ ; GFX908-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 38
+ ; GFX908-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 39
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_35]], implicit [[S_MOV_B32_36]], implicit [[S_MOV_B32_37]], implicit [[S_MOV_B32_38]], implicit [[S_MOV_B32_39]]
+ ; GFX908-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 40
+ ; GFX908-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 41
+ ; GFX908-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 42
+ ; GFX908-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 43
+ ; GFX908-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 44
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_40]], implicit [[S_MOV_B32_41]], implicit [[S_MOV_B32_42]], implicit [[S_MOV_B32_43]], implicit [[S_MOV_B32_44]]
+ ; GFX908-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 45
+ ; GFX908-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 46
+ ; GFX908-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
+ ; GFX908-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 48
+ ; GFX908-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 49
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_45]], implicit [[S_MOV_B32_46]], implicit [[S_MOV_B32_47]], implicit [[S_MOV_B32_48]], implicit [[S_MOV_B32_49]]
+ ; GFX908-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 50
+ ; GFX908-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 51
+ ; GFX908-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 52
+ ; GFX908-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 53
+ ; GFX908-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 54
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_50]], implicit [[S_MOV_B32_51]], implicit [[S_MOV_B32_52]], implicit [[S_MOV_B32_53]], implicit [[S_MOV_B32_54]]
+ ; GFX908-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 55
+ ; GFX908-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 56
+ ; GFX908-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 57
+ ; GFX908-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 58
+ ; GFX908-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 59
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_55]], implicit [[S_MOV_B32_56]], implicit [[S_MOV_B32_57]], implicit [[S_MOV_B32_58]], implicit [[S_MOV_B32_59]]
+ ; GFX908-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 60
+ ; GFX908-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 61
+ ; GFX908-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 62
+ ; GFX908-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 63
+ ; GFX908-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 64
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_60]], implicit [[S_MOV_B32_61]], implicit [[S_MOV_B32_62]], implicit [[S_MOV_B32_63]], implicit [[S_MOV_B32_64]]
+ ; GFX908-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 65
+ ; GFX908-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 66
+ ; GFX908-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 67
+ ; GFX908-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 68
+ ; GFX908-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_65]], implicit [[S_MOV_B32_66]], implicit [[S_MOV_B32_67]], implicit [[S_MOV_B32_68]], implicit [[S_MOV_B32_69]]
+ ; GFX908-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
+ ; GFX908-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
+ ; GFX908-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
+ ; GFX908-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
+ ; GFX908-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_70]], implicit [[S_MOV_B32_71]], implicit [[S_MOV_B32_72]], implicit [[S_MOV_B32_73]], implicit [[S_MOV_B32_74]]
+ ; GFX908-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
+ ; GFX908-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
+ ; GFX908-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
+ ; GFX908-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
+ ; GFX908-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_75]], implicit [[S_MOV_B32_76]], implicit [[S_MOV_B32_77]], implicit [[S_MOV_B32_78]], implicit [[S_MOV_B32_79]]
; GFX908-NEXT: S_ENDPGM 0
;
; GFX90A-LABEL: name: small_num_sgprs_as_spill
; GFX90A: bb.0:
; GFX90A-NEXT: successors: %bb.1(0x80000000)
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
- ; GFX90A-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 2
- ; GFX90A-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 3
- ; GFX90A-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 4
- ; GFX90A-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 5
- ; GFX90A-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 6
- ; GFX90A-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 7
- ; GFX90A-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 8
- ; GFX90A-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 9
- ; GFX90A-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 10
- ; GFX90A-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
- ; GFX90A-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 12
- ; GFX90A-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 13
- ; GFX90A-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 14
- ; GFX90A-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 15
- ; GFX90A-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 16
- ; GFX90A-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 17
- ; GFX90A-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 18
- ; GFX90A-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 19
- ; GFX90A-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 20
- ; GFX90A-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 21
- ; GFX90A-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
- ; GFX90A-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
- ; GFX90A-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
- ; GFX90A-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
- ; GFX90A-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
- ; GFX90A-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
- ; GFX90A-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
- ; GFX90A-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
- ; GFX90A-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 30
- ; GFX90A-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 31
- ; GFX90A-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 32
- ; GFX90A-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 33
- ; GFX90A-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 34
- ; GFX90A-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 35
- ; GFX90A-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 36
- ; GFX90A-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 37
- ; GFX90A-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 38
- ; GFX90A-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 39
- ; GFX90A-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 40
- ; GFX90A-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 41
- ; GFX90A-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 42
- ; GFX90A-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 43
- ; GFX90A-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 44
- ; GFX90A-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 45
- ; GFX90A-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 46
- ; GFX90A-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
- ; GFX90A-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 48
- ; GFX90A-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 49
- ; GFX90A-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 50
- ; GFX90A-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 51
- ; GFX90A-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 52
- ; GFX90A-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 53
- ; GFX90A-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 54
- ; GFX90A-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 55
- ; GFX90A-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 56
- ; GFX90A-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 57
- ; GFX90A-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 58
- ; GFX90A-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 59
- ; GFX90A-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 60
- ; GFX90A-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 61
- ; GFX90A-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 62
- ; GFX90A-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 63
- ; GFX90A-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 64
- ; GFX90A-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 65
- ; GFX90A-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 66
- ; GFX90A-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 67
- ; GFX90A-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 68
- ; GFX90A-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
- ; GFX90A-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
- ; GFX90A-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
- ; GFX90A-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
- ; GFX90A-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
- ; GFX90A-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
- ; GFX90A-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
- ; GFX90A-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
- ; GFX90A-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
- ; GFX90A-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
- ; GFX90A-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
- ; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.1:
- ; GFX90A-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_79]], implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_4]], implicit [[S_MOV_B32_5]], implicit [[S_MOV_B32_6]], implicit [[S_MOV_B32_7]], implicit [[S_MOV_B32_8]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_9]], implicit [[S_MOV_B32_10]], implicit [[S_MOV_B32_11]], implicit [[S_MOV_B32_12]], implicit [[S_MOV_B32_13]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_14]], implicit [[S_MOV_B32_15]], implicit [[S_MOV_B32_16]], implicit [[S_MOV_B32_17]], implicit [[S_MOV_B32_18]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_19]], implicit [[S_MOV_B32_20]], implicit [[S_MOV_B32_21]], implicit [[S_MOV_B32_22]], implicit [[S_MOV_B32_23]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_24]], implicit [[S_MOV_B32_25]], implicit [[S_MOV_B32_26]], implicit [[S_MOV_B32_27]], implicit [[S_MOV_B32_28]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_29]], implicit [[S_MOV_B32_30]], implicit [[S_MOV_B32_31]], implicit [[S_MOV_B32_32]], implicit [[S_MOV_B32_33]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_34]], implicit [[S_MOV_B32_35]], implicit [[S_MOV_B32_36]], implicit [[S_MOV_B32_37]], implicit [[S_MOV_B32_38]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_39]], implicit [[S_MOV_B32_40]], implicit [[S_MOV_B32_41]], implicit [[S_MOV_B32_42]], implicit [[S_MOV_B32_43]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_44]], implicit [[S_MOV_B32_45]], implicit [[S_MOV_B32_46]], implicit [[S_MOV_B32_47]], implicit [[S_MOV_B32_48]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_49]], implicit [[S_MOV_B32_50]], implicit [[S_MOV_B32_51]], implicit [[S_MOV_B32_52]], implicit [[S_MOV_B32_53]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_54]], implicit [[S_MOV_B32_55]], implicit [[S_MOV_B32_56]], implicit [[S_MOV_B32_57]], implicit [[S_MOV_B32_58]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_59]], implicit [[S_MOV_B32_60]], implicit [[S_MOV_B32_61]], implicit [[S_MOV_B32_62]], implicit [[S_MOV_B32_63]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_64]], implicit [[S_MOV_B32_65]], implicit [[S_MOV_B32_66]], implicit [[S_MOV_B32_67]], implicit [[S_MOV_B32_68]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_69]], implicit [[S_MOV_B32_70]], implicit [[S_MOV_B32_71]], implicit [[S_MOV_B32_72]], implicit [[S_MOV_B32_73]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_74]], implicit [[S_MOV_B32_75]], implicit [[S_MOV_B32_76]], implicit [[S_MOV_B32_77]], implicit [[S_MOV_B32_78]]
+ ; GFX90A-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+ ; GFX90A-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
+ ; GFX90A-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 2
+ ; GFX90A-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 3
+ ; GFX90A-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 4
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
+ ; GFX90A-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 5
+ ; GFX90A-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 6
+ ; GFX90A-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 7
+ ; GFX90A-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 8
+ ; GFX90A-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 9
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_5]], implicit [[S_MOV_B32_6]], implicit [[S_MOV_B32_7]], implicit [[S_MOV_B32_8]], implicit [[S_MOV_B32_9]]
+ ; GFX90A-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 10
+ ; GFX90A-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
+ ; GFX90A-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 12
+ ; GFX90A-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 13
+ ; GFX90A-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 14
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_10]], implicit [[S_MOV_B32_11]], implicit [[S_MOV_B32_12]], implicit [[S_MOV_B32_13]], implicit [[S_MOV_B32_14]]
+ ; GFX90A-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 15
+ ; GFX90A-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 16
+ ; GFX90A-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 17
+ ; GFX90A-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 18
+ ; GFX90A-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 19
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_15]], implicit [[S_MOV_B32_16]], implicit [[S_MOV_B32_17]], implicit [[S_MOV_B32_18]], implicit [[S_MOV_B32_19]]
+ ; GFX90A-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 20
+ ; GFX90A-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 21
+ ; GFX90A-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
+ ; GFX90A-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
+ ; GFX90A-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_20]], implicit [[S_MOV_B32_21]], implicit [[S_MOV_B32_22]], implicit [[S_MOV_B32_23]], implicit [[S_MOV_B32_24]]
+ ; GFX90A-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
+ ; GFX90A-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
+ ; GFX90A-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
+ ; GFX90A-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
+ ; GFX90A-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_25]], implicit [[S_MOV_B32_26]], implicit [[S_MOV_B32_27]], implicit [[S_MOV_B32_28]], implicit [[S_MOV_B32_29]]
+ ; GFX90A-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 30
+ ; GFX90A-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 31
+ ; GFX90A-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 32
+ ; GFX90A-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 33
+ ; GFX90A-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 34
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_30]], implicit [[S_MOV_B32_31]], implicit [[S_MOV_B32_32]], implicit [[S_MOV_B32_33]], implicit [[S_MOV_B32_34]]
+ ; GFX90A-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 35
+ ; GFX90A-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 36
+ ; GFX90A-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 37
+ ; GFX90A-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 38
+ ; GFX90A-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 39
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_35]], implicit [[S_MOV_B32_36]], implicit [[S_MOV_B32_37]], implicit [[S_MOV_B32_38]], implicit [[S_MOV_B32_39]]
+ ; GFX90A-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 40
+ ; GFX90A-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 41
+ ; GFX90A-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 42
+ ; GFX90A-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 43
+ ; GFX90A-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 44
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_40]], implicit [[S_MOV_B32_41]], implicit [[S_MOV_B32_42]], implicit [[S_MOV_B32_43]], implicit [[S_MOV_B32_44]]
+ ; GFX90A-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 45
+ ; GFX90A-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 46
+ ; GFX90A-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
+ ; GFX90A-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 48
+ ; GFX90A-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 49
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_45]], implicit [[S_MOV_B32_46]], implicit [[S_MOV_B32_47]], implicit [[S_MOV_B32_48]], implicit [[S_MOV_B32_49]]
+ ; GFX90A-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 50
+ ; GFX90A-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 51
+ ; GFX90A-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 52
+ ; GFX90A-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 53
+ ; GFX90A-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 54
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_50]], implicit [[S_MOV_B32_51]], implicit [[S_MOV_B32_52]], implicit [[S_MOV_B32_53]], implicit [[S_MOV_B32_54]]
+ ; GFX90A-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 55
+ ; GFX90A-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 56
+ ; GFX90A-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 57
+ ; GFX90A-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 58
+ ; GFX90A-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 59
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_55]], implicit [[S_MOV_B32_56]], implicit [[S_MOV_B32_57]], implicit [[S_MOV_B32_58]], implicit [[S_MOV_B32_59]]
+ ; GFX90A-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 60
+ ; GFX90A-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 61
+ ; GFX90A-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 62
+ ; GFX90A-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 63
+ ; GFX90A-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 64
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_60]], implicit [[S_MOV_B32_61]], implicit [[S_MOV_B32_62]], implicit [[S_MOV_B32_63]], implicit [[S_MOV_B32_64]]
+ ; GFX90A-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 65
+ ; GFX90A-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 66
+ ; GFX90A-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 67
+ ; GFX90A-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 68
+ ; GFX90A-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_65]], implicit [[S_MOV_B32_66]], implicit [[S_MOV_B32_67]], implicit [[S_MOV_B32_68]], implicit [[S_MOV_B32_69]]
+ ; GFX90A-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
+ ; GFX90A-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
+ ; GFX90A-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
+ ; GFX90A-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
+ ; GFX90A-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_70]], implicit [[S_MOV_B32_71]], implicit [[S_MOV_B32_72]], implicit [[S_MOV_B32_73]], implicit [[S_MOV_B32_74]]
+ ; GFX90A-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
+ ; GFX90A-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
+ ; GFX90A-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
+ ; GFX90A-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
+ ; GFX90A-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_75]], implicit [[S_MOV_B32_76]], implicit [[S_MOV_B32_77]], implicit [[S_MOV_B32_78]], implicit [[S_MOV_B32_79]]
; GFX90A-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1
@@ -373,15 +371,15 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_14]], implicit [[V_CVT_I32_F64_e32_15]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_13]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]]
; GFX908-NEXT: S_ENDPGM 0
;
; GFX90A-LABEL: name: small_num_vgprs_as_spill
@@ -401,14 +399,14 @@ body: |
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.1:
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]]
; GFX90A-NEXT: S_ENDPGM 0
bb.0:
@@ -475,6 +473,12 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.1:
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode
; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode
@@ -487,12 +491,6 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode
; GFX908-NEXT: [[V_CVT_I32_F64_e32_34:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 34, implicit $exec, implicit $mode
; GFX908-NEXT: [[V_CVT_I32_F64_e32_35:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 35, implicit $exec, implicit $mode
- ; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: bb.1:
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]], implicit [[V_CVT_I32_F64_e32_34]]
@@ -527,27 +525,27 @@ body: |
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: bb.1:
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_34:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 34, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_35:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 35, implicit $exec, implicit $mode
- ; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.1:
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]], implicit [[V_CVT_I32_F64_e32_34]]
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_35:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 35, implicit $exec, implicit $mode
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_35]]
; GFX90A-NEXT: S_ENDPGM 0
bb.0:
@@ -639,6 +637,11 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.1:
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode
; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode
@@ -647,11 +650,6 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
- ; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: bb.1:
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
@@ -686,22 +684,22 @@ body: |
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.1:
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]]
; GFX90A-NEXT: S_ENDPGM 0
bb.0:
@@ -767,38 +765,6 @@ body: |
; GFX908-NEXT: successors: %bb.1(0x80000000)
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF1:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF2:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF3:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF4:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF5:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF6:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF7:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF8:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF9:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF10:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF11:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF12:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF13:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF14:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF15:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF16:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF17:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF18:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF19:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF20:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF21:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF22:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF23:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF24:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF25:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF26:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF27:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF28:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF29:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF30:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF31:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
@@ -825,29 +791,61 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 64, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF1:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF2:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]]
+ ; GFX908-NEXT: [[DEF3:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF4:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF5:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF6:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF7:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]]
+ ; GFX908-NEXT: [[DEF8:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF9:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF10:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF11:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF12:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF8]], implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]]
+ ; GFX908-NEXT: [[DEF13:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF14:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF15:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF16:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF17:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 64, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]]
+ ; GFX908-NEXT: [[DEF18:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF19:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF20:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF21:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF22:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF18]], implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]]
+ ; GFX908-NEXT: [[DEF23:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF24:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF25:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF26:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF27:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]]
+ ; GFX908-NEXT: [[DEF28:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF29:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF30:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF31:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF28]], implicit [[DEF29]], implicit [[DEF30]], implicit [[DEF31]], implicit [[V_CVT_I32_F64_e32_32]]
; GFX908-NEXT: [[DEF32:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]], implicit [[DEF32]], implicit [[DEF]], implicit [[DEF1]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF7]], implicit [[DEF8]], implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF12]], implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF17]], implicit [[DEF18]], implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF22]], implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF27]], implicit [[DEF28]], implicit [[DEF29]], implicit [[DEF30]], implicit [[V_CVT_I32_F64_e32_31]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF31]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF32]]
; GFX908-NEXT: S_ENDPGM 0
;
; GFX90A-LABEL: name: reduce_arch_and_acc_vgrp_spill
@@ -881,61 +879,61 @@ body: |
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: bb.1:
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
; GFX90A-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF1:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF2:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]]
; GFX90A-NEXT: [[DEF3:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF4:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF5:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF6:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF7:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]]
; GFX90A-NEXT: [[DEF8:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF9:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF10:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF11:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF12:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF8]], implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]]
; GFX90A-NEXT: [[DEF13:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF14:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF15:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF16:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF17:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]]
; GFX90A-NEXT: [[DEF18:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF19:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF20:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF21:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF22:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF18]], implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]]
; GFX90A-NEXT: [[DEF23:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF24:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF25:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF26:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF27:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]]
; GFX90A-NEXT: [[DEF28:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF29:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF30:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF31:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 64, implicit $exec, implicit $mode
- ; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.1:
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]]
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 64, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF28]], implicit [[DEF29]], implicit [[DEF30]], implicit [[DEF31]], implicit [[V_CVT_I32_F64_e32_32]]
; GFX90A-NEXT: [[DEF32:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]], implicit [[DEF32]], implicit [[DEF]], implicit [[DEF1]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF7]], implicit [[DEF8]], implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF12]], implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF17]], implicit [[DEF18]], implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF22]], implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF27]], implicit [[DEF28]], implicit [[DEF29]], implicit [[DEF30]], implicit [[V_CVT_I32_F64_e32_31]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF31]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF32]]
; GFX90A-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1
@@ -1041,7 +1039,6 @@ body: |
; GFX908-NEXT: successors: %bb.1(0x80000000)
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
@@ -1296,7 +1293,6 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_252:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 252, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_253:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 253, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_254:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 254, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_255:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 256, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
@@ -1324,8 +1320,10 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_220]], implicit [[V_CVT_I32_F64_e32_221]], implicit [[V_CVT_I32_F64_e32_222]], implicit [[V_CVT_I32_F64_e32_223]], implicit [[V_CVT_I32_F64_e32_224]], implicit [[V_CVT_I32_F64_e32_225]], implicit [[V_CVT_I32_F64_e32_226]], implicit [[V_CVT_I32_F64_e32_227]], implicit [[V_CVT_I32_F64_e32_228]], implicit [[V_CVT_I32_F64_e32_229]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_230]], implicit [[V_CVT_I32_F64_e32_231]], implicit [[V_CVT_I32_F64_e32_232]], implicit [[V_CVT_I32_F64_e32_233]], implicit [[V_CVT_I32_F64_e32_234]], implicit [[V_CVT_I32_F64_e32_235]], implicit [[V_CVT_I32_F64_e32_236]], implicit [[V_CVT_I32_F64_e32_237]], implicit [[V_CVT_I32_F64_e32_238]], implicit [[V_CVT_I32_F64_e32_239]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_240]], implicit [[V_CVT_I32_F64_e32_241]], implicit [[V_CVT_I32_F64_e32_242]], implicit [[V_CVT_I32_F64_e32_243]], implicit [[V_CVT_I32_F64_e32_244]], implicit [[V_CVT_I32_F64_e32_245]], implicit [[V_CVT_I32_F64_e32_246]], implicit [[V_CVT_I32_F64_e32_247]], implicit [[V_CVT_I32_F64_e32_248]], implicit [[V_CVT_I32_F64_e32_249]]
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_256:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 255, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_250]], implicit [[V_CVT_I32_F64_e32_251]], implicit [[V_CVT_I32_F64_e32_252]], implicit [[V_CVT_I32_F64_e32_253]], implicit [[V_CVT_I32_F64_e32_254]], implicit [[V_CVT_I32_F64_e32_256]], implicit [[V_CVT_I32_F64_e32_255]], implicit [[DEF]]
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_255:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 255, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_256:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 256, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_250]], implicit [[V_CVT_I32_F64_e32_251]], implicit [[V_CVT_I32_F64_e32_252]], implicit [[V_CVT_I32_F64_e32_253]], implicit [[V_CVT_I32_F64_e32_254]], implicit [[V_CVT_I32_F64_e32_255]], implicit [[V_CVT_I32_F64_e32_256]], implicit [[DEF]]
; GFX908-NEXT: S_ENDPGM 0
;
; GFX90A-LABEL: name: reduce_spill_archvgpr_above_addressable_limit
@@ -1333,7 +1331,6 @@ body: |
; GFX90A-NEXT: successors: %bb.1(0x80000000)
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
@@ -1588,7 +1585,6 @@ body: |
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_252:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 252, implicit $exec, implicit $mode, implicit-def $m0
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_253:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 253, implicit $exec, implicit $mode, implicit-def $m0
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_254:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 254, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_255:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 256, implicit $exec, implicit $mode
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.1:
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
@@ -1616,8 +1612,10 @@ body: |
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_220]], implicit [[V_CVT_I32_F64_e32_221]], implicit [[V_CVT_I32_F64_e32_222]], implicit [[V_CVT_I32_F64_e32_223]], implicit [[V_CVT_I32_F64_e32_224]], implicit [[V_CVT_I32_F64_e32_225]], implicit [[V_CVT_I32_F64_e32_226]], implicit [[V_CVT_I32_F64_e32_227]], implicit [[V_CVT_I32_F64_e32_228]], implicit [[V_CVT_I32_F64_e32_229]]
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_230]], implicit [[V_CVT_I32_F64_e32_231]], implicit [[V_CVT_I32_F64_e32_232]], implicit [[V_CVT_I32_F64_e32_233]], implicit [[V_CVT_I32_F64_e32_234]], implicit [[V_CVT_I32_F64_e32_235]], implicit [[V_CVT_I32_F64_e32_236]], implicit [[V_CVT_I32_F64_e32_237]], implicit [[V_CVT_I32_F64_e32_238]], implicit [[V_CVT_I32_F64_e32_239]]
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_240]], implicit [[V_CVT_I32_F64_e32_241]], implicit [[V_CVT_I32_F64_e32_242]], implicit [[V_CVT_I32_F64_e32_243]], implicit [[V_CVT_I32_F64_e32_244]], implicit [[V_CVT_I32_F64_e32_245]], implicit [[V_CVT_I32_F64_e32_246]], implicit [[V_CVT_I32_F64_e32_247]], implicit [[V_CVT_I32_F64_e32_248]], implicit [[V_CVT_I32_F64_e32_249]]
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_256:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 255, implicit $exec, implicit $mode
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_250]], implicit [[V_CVT_I32_F64_e32_251]], implicit [[V_CVT_I32_F64_e32_252]], implicit [[V_CVT_I32_F64_e32_253]], implicit [[V_CVT_I32_F64_e32_254]], implicit [[V_CVT_I32_F64_e32_256]], implicit [[V_CVT_I32_F64_e32_255]], implicit [[DEF]]
+ ; GFX90A-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_255:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 255, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_256:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 256, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_250]], implicit [[V_CVT_I32_F64_e32_251]], implicit [[V_CVT_I32_F64_e32_252]], implicit [[V_CVT_I32_F64_e32_253]], implicit [[V_CVT_I32_F64_e32_254]], implicit [[V_CVT_I32_F64_e32_255]], implicit [[V_CVT_I32_F64_e32_256]], implicit [[DEF]]
; GFX90A-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1
@@ -1926,8 +1924,7 @@ body: |
; GFX908: bb.0:
; GFX908-NEXT: successors: %bb.1(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 257, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 258, implicit $exec, implicit $mode
+ ; GFX908-NEXT: bb.1:
; GFX908-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF1:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF2:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -1938,6 +1935,7 @@ body: |
; GFX908-NEXT: [[DEF7:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF8:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF9:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]], implicit [[DEF9]]
; GFX908-NEXT: [[DEF10:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF11:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF12:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -1948,6 +1946,7 @@ body: |
; GFX908-NEXT: [[DEF17:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF18:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF19:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]], implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]], implicit [[DEF18]], implicit [[DEF19]]
; GFX908-NEXT: [[DEF20:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF21:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF22:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -1958,6 +1957,7 @@ body: |
; GFX908-NEXT: [[DEF27:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF28:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF29:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]], implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]], implicit [[DEF28]], implicit [[DEF29]]
; GFX908-NEXT: [[DEF30:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF31:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF32:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -1968,6 +1968,7 @@ body: |
; GFX908-NEXT: [[DEF37:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF38:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF39:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF30]], implicit [[DEF31]], implicit [[DEF32]], implicit [[DEF33]], implicit [[DEF34]], implicit [[DEF35]], implicit [[DEF36]], implicit [[DEF37]], implicit [[DEF38]], implicit [[DEF39]]
; GFX908-NEXT: [[DEF40:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF41:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF42:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -1978,6 +1979,7 @@ body: |
; GFX908-NEXT: [[DEF47:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF48:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF49:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF40]], implicit [[DEF41]], implicit [[DEF42]], implicit [[DEF43]], implicit [[DEF44]], implicit [[DEF45]], implicit [[DEF46]], implicit [[DEF47]], implicit [[DEF48]], implicit [[DEF49]]
; GFX908-NEXT: [[DEF50:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF51:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF52:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -1988,6 +1990,7 @@ body: |
; GFX908-NEXT: [[DEF57:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF58:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF59:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF50]], implicit [[DEF51]], implicit [[DEF52]], implicit [[DEF53]], implicit [[DEF54]], implicit [[DEF55]], implicit [[DEF56]], implicit [[DEF57]], implicit [[DEF58]], implicit [[DEF59]]
; GFX908-NEXT: [[DEF60:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF61:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF62:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -1998,6 +2001,7 @@ body: |
; GFX908-NEXT: [[DEF67:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF68:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF69:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF60]], implicit [[DEF61]], implicit [[DEF62]], implicit [[DEF63]], implicit [[DEF64]], implicit [[DEF65]], implicit [[DEF66]], implicit [[DEF67]], implicit [[DEF68]], implicit [[DEF69]]
; GFX908-NEXT: [[DEF70:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF71:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF72:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2008,6 +2012,7 @@ body: |
; GFX908-NEXT: [[DEF77:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF78:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF79:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF70]], implicit [[DEF71]], implicit [[DEF72]], implicit [[DEF73]], implicit [[DEF74]], implicit [[DEF75]], implicit [[DEF76]], implicit [[DEF77]], implicit [[DEF78]], implicit [[DEF79]]
; GFX908-NEXT: [[DEF80:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF81:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF82:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2018,6 +2023,7 @@ body: |
; GFX908-NEXT: [[DEF87:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF88:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF89:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF80]], implicit [[DEF81]], implicit [[DEF82]], implicit [[DEF83]], implicit [[DEF84]], implicit [[DEF85]], implicit [[DEF86]], implicit [[DEF87]], implicit [[DEF88]], implicit [[DEF89]]
; GFX908-NEXT: [[DEF90:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF91:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF92:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2028,6 +2034,7 @@ body: |
; GFX908-NEXT: [[DEF97:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF98:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF99:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF90]], implicit [[DEF91]], implicit [[DEF92]], implicit [[DEF93]], implicit [[DEF94]], implicit [[DEF95]], implicit [[DEF96]], implicit [[DEF97]], implicit [[DEF98]], implicit [[DEF99]]
; GFX908-NEXT: [[DEF100:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF101:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF102:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2038,6 +2045,7 @@ body: |
; GFX908-NEXT: [[DEF107:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF108:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF109:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF100]], implicit [[DEF101]], implicit [[DEF102]], implicit [[DEF103]], implicit [[DEF104]], implicit [[DEF105]], implicit [[DEF106]], implicit [[DEF107]], implicit [[DEF108]], implicit [[DEF109]]
; GFX908-NEXT: [[DEF110:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF111:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF112:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2048,6 +2056,7 @@ body: |
; GFX908-NEXT: [[DEF117:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF118:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF119:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF110]], implicit [[DEF111]], implicit [[DEF112]], implicit [[DEF113]], implicit [[DEF114]], implicit [[DEF115]], implicit [[DEF116]], implicit [[DEF117]], implicit [[DEF118]], implicit [[DEF119]]
; GFX908-NEXT: [[DEF120:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF121:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF122:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2058,6 +2067,7 @@ body: |
; GFX908-NEXT: [[DEF127:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF128:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF129:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF120]], implicit [[DEF121]], implicit [[DEF122]], implicit [[DEF123]], implicit [[DEF124]], implicit [[DEF125]], implicit [[DEF126]], implicit [[DEF127]], implicit [[DEF128]], implicit [[DEF129]]
; GFX908-NEXT: [[DEF130:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF131:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF132:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2068,6 +2078,7 @@ body: |
; GFX908-NEXT: [[DEF137:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF138:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF139:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF130]], implicit [[DEF131]], implicit [[DEF132]], implicit [[DEF133]], implicit [[DEF134]], implicit [[DEF135]], implicit [[DEF136]], implicit [[DEF137]], implicit [[DEF138]], implicit [[DEF139]]
; GFX908-NEXT: [[DEF140:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF141:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF142:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2078,6 +2089,7 @@ body: |
; GFX908-NEXT: [[DEF147:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF148:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF149:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF140]], implicit [[DEF141]], implicit [[DEF142]], implicit [[DEF143]], implicit [[DEF144]], implicit [[DEF145]], implicit [[DEF146]], implicit [[DEF147]], implicit [[DEF148]], implicit [[DEF149]]
; GFX908-NEXT: [[DEF150:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF151:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF152:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2088,6 +2100,7 @@ body: |
; GFX908-NEXT: [[DEF157:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF158:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF159:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF150]], implicit [[DEF151]], implicit [[DEF152]], implicit [[DEF153]], implicit [[DEF154]], implicit [[DEF155]], implicit [[DEF156]], implicit [[DEF157]], implicit [[DEF158]], implicit [[DEF159]]
; GFX908-NEXT: [[DEF160:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF161:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF162:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2098,6 +2111,7 @@ body: |
; GFX908-NEXT: [[DEF167:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF168:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF169:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF160]], implicit [[DEF161]], implicit [[DEF162]], implicit [[DEF163]], implicit [[DEF164]], implicit [[DEF165]], implicit [[DEF166]], implicit [[DEF167]], implicit [[DEF168]], implicit [[DEF169]]
; GFX908-NEXT: [[DEF170:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF171:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF172:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2108,6 +2122,7 @@ body: |
; GFX908-NEXT: [[DEF177:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF178:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF179:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF170]], implicit [[DEF171]], implicit [[DEF172]], implicit [[DEF173]], implicit [[DEF174]], implicit [[DEF175]], implicit [[DEF176]], implicit [[DEF177]], implicit [[DEF178]], implicit [[DEF179]]
; GFX908-NEXT: [[DEF180:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF181:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF182:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2118,6 +2133,7 @@ body: |
; GFX908-NEXT: [[DEF187:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF188:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF189:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF180]], implicit [[DEF181]], implicit [[DEF182]], implicit [[DEF183]], implicit [[DEF184]], implicit [[DEF185]], implicit [[DEF186]], implicit [[DEF187]], implicit [[DEF188]], implicit [[DEF189]]
; GFX908-NEXT: [[DEF190:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF191:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF192:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2128,6 +2144,7 @@ body: |
; GFX908-NEXT: [[DEF197:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF198:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF199:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF190]], implicit [[DEF191]], implicit [[DEF192]], implicit [[DEF193]], implicit [[DEF194]], implicit [[DEF195]], implicit [[DEF196]], implicit [[DEF197]], implicit [[DEF198]], implicit [[DEF199]]
; GFX908-NEXT: [[DEF200:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF201:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF202:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2138,6 +2155,7 @@ body: |
; GFX908-NEXT: [[DEF207:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF208:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF209:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF200]], implicit [[DEF201]], implicit [[DEF202]], implicit [[DEF203]], implicit [[DEF204]], implicit [[DEF205]], implicit [[DEF206]], implicit [[DEF207]], implicit [[DEF208]], implicit [[DEF209]]
; GFX908-NEXT: [[DEF210:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF211:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF212:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2148,6 +2166,7 @@ body: |
; GFX908-NEXT: [[DEF217:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF218:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF219:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF210]], implicit [[DEF211]], implicit [[DEF212]], implicit [[DEF213]], implicit [[DEF214]], implicit [[DEF215]], implicit [[DEF216]], implicit [[DEF217]], implicit [[DEF218]], implicit [[DEF219]]
; GFX908-NEXT: [[DEF220:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF221:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF222:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2158,6 +2177,9 @@ body: |
; GFX908-NEXT: [[DEF227:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF228:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF229:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 257, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 258, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF220]], implicit [[DEF221]], implicit [[DEF222]], implicit [[DEF223]], implicit [[DEF224]], implicit [[DEF225]], implicit [[DEF226]], implicit [[DEF227]], implicit [[DEF228]], implicit [[DEF229]]
; GFX908-NEXT: [[DEF230:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF231:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF232:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2168,6 +2190,7 @@ body: |
; GFX908-NEXT: [[DEF237:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF238:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF239:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF230]], implicit [[DEF231]], implicit [[DEF232]], implicit [[DEF233]], implicit [[DEF234]], implicit [[DEF235]], implicit [[DEF236]], implicit [[DEF237]], implicit [[DEF238]], implicit [[DEF239]]
; GFX908-NEXT: [[DEF240:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF241:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF242:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2178,47 +2201,22 @@ body: |
; GFX908-NEXT: [[DEF247:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF248:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF249:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF240]], implicit [[DEF241]], implicit [[DEF242]], implicit [[DEF243]], implicit [[DEF244]], implicit [[DEF245]], implicit [[DEF246]], implicit [[DEF247]], implicit [[DEF248]], implicit [[DEF249]]
; GFX908-NEXT: [[DEF250:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF251:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF252:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF253:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF254:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF255:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: bb.1:
; GFX908-NEXT: [[DEF256:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF256]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]], implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]], implicit [[DEF18]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]], implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]], implicit [[DEF28]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF29]], implicit [[DEF30]], implicit [[DEF31]], implicit [[DEF32]], implicit [[DEF33]], implicit [[DEF34]], implicit [[DEF35]], implicit [[DEF36]], implicit [[DEF37]], implicit [[DEF38]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF39]], implicit [[DEF40]], implicit [[DEF41]], implicit [[DEF42]], implicit [[DEF43]], implicit [[DEF44]], implicit [[DEF45]], implicit [[DEF46]], implicit [[DEF47]], implicit [[DEF48]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF49]], implicit [[DEF50]], implicit [[DEF51]], implicit [[DEF52]], implicit [[DEF53]], implicit [[DEF54]], implicit [[DEF55]], implicit [[DEF56]], implicit [[DEF57]], implicit [[DEF58]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF59]], implicit [[DEF60]], implicit [[DEF61]], implicit [[DEF62]], implicit [[DEF63]], implicit [[DEF64]], implicit [[DEF65]], implicit [[DEF66]], implicit [[DEF67]], implicit [[DEF68]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF69]], implicit [[DEF70]], implicit [[DEF71]], implicit [[DEF72]], implicit [[DEF73]], implicit [[DEF74]], implicit [[DEF75]], implicit [[DEF76]], implicit [[DEF77]], implicit [[DEF78]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF79]], implicit [[DEF80]], implicit [[DEF81]], implicit [[DEF82]], implicit [[DEF83]], implicit [[DEF84]], implicit [[DEF85]], implicit [[DEF86]], implicit [[DEF87]], implicit [[DEF88]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF89]], implicit [[DEF90]], implicit [[DEF91]], implicit [[DEF92]], implicit [[DEF93]], implicit [[DEF94]], implicit [[DEF95]], implicit [[DEF96]], implicit [[DEF97]], implicit [[DEF98]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF99]], implicit [[DEF100]], implicit [[DEF101]], implicit [[DEF102]], implicit [[DEF103]], implicit [[DEF104]], implicit [[DEF105]], implicit [[DEF106]], implicit [[DEF107]], implicit [[DEF108]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF109]], implicit [[DEF110]], implicit [[DEF111]], implicit [[DEF112]], implicit [[DEF113]], implicit [[DEF114]], implicit [[DEF115]], implicit [[DEF116]], implicit [[DEF117]], implicit [[DEF118]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF119]], implicit [[DEF120]], implicit [[DEF121]], implicit [[DEF122]], implicit [[DEF123]], implicit [[DEF124]], implicit [[DEF125]], implicit [[DEF126]], implicit [[DEF127]], implicit [[DEF128]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF129]], implicit [[DEF130]], implicit [[DEF131]], implicit [[DEF132]], implicit [[DEF133]], implicit [[DEF134]], implicit [[DEF135]], implicit [[DEF136]], implicit [[DEF137]], implicit [[DEF138]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF139]], implicit [[DEF140]], implicit [[DEF141]], implicit [[DEF142]], implicit [[DEF143]], implicit [[DEF144]], implicit [[DEF145]], implicit [[DEF146]], implicit [[DEF147]], implicit [[DEF148]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF149]], implicit [[DEF150]], implicit [[DEF151]], implicit [[DEF152]], implicit [[DEF153]], implicit [[DEF154]], implicit [[DEF155]], implicit [[DEF156]], implicit [[DEF157]], implicit [[DEF158]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF159]], implicit [[DEF160]], implicit [[DEF161]], implicit [[DEF162]], implicit [[DEF163]], implicit [[DEF164]], implicit [[DEF165]], implicit [[DEF166]], implicit [[DEF167]], implicit [[DEF168]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF169]], implicit [[DEF170]], implicit [[DEF171]], implicit [[DEF172]], implicit [[DEF173]], implicit [[DEF174]], implicit [[DEF175]], implicit [[DEF176]], implicit [[DEF177]], implicit [[DEF178]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF179]], implicit [[DEF180]], implicit [[DEF181]], implicit [[DEF182]], implicit [[DEF183]], implicit [[DEF184]], implicit [[DEF185]], implicit [[DEF186]], implicit [[DEF187]], implicit [[DEF188]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF189]], implicit [[DEF190]], implicit [[DEF191]], implicit [[DEF192]], implicit [[DEF193]], implicit [[DEF194]], implicit [[DEF195]], implicit [[DEF196]], implicit [[DEF197]], implicit [[DEF198]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF199]], implicit [[DEF200]], implicit [[DEF201]], implicit [[DEF202]], implicit [[DEF203]], implicit [[DEF204]], implicit [[DEF205]], implicit [[DEF206]], implicit [[DEF207]], implicit [[DEF208]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF209]], implicit [[DEF210]], implicit [[DEF211]], implicit [[DEF212]], implicit [[DEF213]], implicit [[DEF214]], implicit [[DEF215]], implicit [[DEF216]], implicit [[DEF217]], implicit [[DEF218]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF219]], implicit [[DEF220]], implicit [[DEF221]], implicit [[DEF222]], implicit [[DEF223]], implicit [[DEF224]], implicit [[DEF225]], implicit [[DEF226]], implicit [[DEF227]], implicit [[DEF228]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF229]], implicit [[DEF230]], implicit [[DEF231]], implicit [[DEF232]], implicit [[DEF233]], implicit [[DEF234]], implicit [[DEF235]], implicit [[DEF236]], implicit [[DEF237]], implicit [[DEF238]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF239]], implicit [[DEF240]], implicit [[DEF241]], implicit [[DEF242]], implicit [[DEF243]], implicit [[DEF244]], implicit [[DEF245]], implicit [[DEF246]], implicit [[DEF247]], implicit [[DEF248]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF249]], implicit [[DEF250]], implicit [[DEF251]], implicit [[DEF252]], implicit [[DEF253]], implicit [[DEF254]], implicit [[DEF255]], implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF250]], implicit [[DEF251]], implicit [[DEF252]], implicit [[DEF253]], implicit [[DEF254]], implicit [[DEF255]], implicit [[DEF256]], implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
; GFX908-NEXT: S_ENDPGM 0
;
; GFX90A-LABEL: name: reduce_spill_agpr_above_addressable_limit
; GFX90A: bb.0:
; GFX90A-NEXT: successors: %bb.1(0x80000000)
; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: bb.1:
; GFX90A-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF1:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF2:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2229,6 +2227,7 @@ body: |
; GFX90A-NEXT: [[DEF7:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF8:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF9:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]], implicit [[DEF9]]
; GFX90A-NEXT: [[DEF10:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF11:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF12:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2239,6 +2238,7 @@ body: |
; GFX90A-NEXT: [[DEF17:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF18:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF19:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]], implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]], implicit [[DEF18]], implicit [[DEF19]]
; GFX90A-NEXT: [[DEF20:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF21:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF22:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2249,6 +2249,7 @@ body: |
; GFX90A-NEXT: [[DEF27:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF28:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF29:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]], implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]], implicit [[DEF28]], implicit [[DEF29]]
; GFX90A-NEXT: [[DEF30:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF31:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF32:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2259,6 +2260,7 @@ body: |
; GFX90A-NEXT: [[DEF37:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF38:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF39:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF30]], implicit [[DEF31]], implicit [[DEF32]], implicit [[DEF33]], implicit [[DEF34]], implicit [[DEF35]], implicit [[DEF36]], implicit [[DEF37]], implicit [[DEF38]], implicit [[DEF39]]
; GFX90A-NEXT: [[DEF40:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF41:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF42:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2269,6 +2271,7 @@ body: |
; GFX90A-NEXT: [[DEF47:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF48:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF49:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF40]], implicit [[DEF41]], implicit [[DEF42]], implicit [[DEF43]], implicit [[DEF44]], implicit [[DEF45]], implicit [[DEF46]], implicit [[DEF47]], implicit [[DEF48]], implicit [[DEF49]]
; GFX90A-NEXT: [[DEF50:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF51:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF52:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2279,6 +2282,7 @@ body: |
; GFX90A-NEXT: [[DEF57:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF58:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF59:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF50]], implicit [[DEF51]], implicit [[DEF52]], implicit [[DEF53]], implicit [[DEF54]], implicit [[DEF55]], implicit [[DEF56]], implicit [[DEF57]], implicit [[DEF58]], implicit [[DEF59]]
; GFX90A-NEXT: [[DEF60:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF61:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF62:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2289,6 +2293,7 @@ body: |
; GFX90A-NEXT: [[DEF67:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF68:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF69:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF60]], implicit [[DEF61]], implicit [[DEF62]], implicit [[DEF63]], implicit [[DEF64]], implicit [[DEF65]], implicit [[DEF66]], implicit [[DEF67]], implicit [[DEF68]], implicit [[DEF69]]
; GFX90A-NEXT: [[DEF70:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF71:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF72:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2299,6 +2304,7 @@ body: |
; GFX90A-NEXT: [[DEF77:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF78:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF79:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF70]], implicit [[DEF71]], implicit [[DEF72]], implicit [[DEF73]], implicit [[DEF74]], implicit [[DEF75]], implicit [[DEF76]], implicit [[DEF77]], implicit [[DEF78]], implicit [[DEF79]]
; GFX90A-NEXT: [[DEF80:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF81:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF82:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2309,6 +2315,7 @@ body: |
; GFX90A-NEXT: [[DEF87:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF88:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF89:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF80]], implicit [[DEF81]], implicit [[DEF82]], implicit [[DEF83]], implicit [[DEF84]], implicit [[DEF85]], implicit [[DEF86]], implicit [[DEF87]], implicit [[DEF88]], implicit [[DEF89]]
; GFX90A-NEXT: [[DEF90:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF91:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF92:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2319,6 +2326,7 @@ body: |
; GFX90A-NEXT: [[DEF97:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF98:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF99:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF90]], implicit [[DEF91]], implicit [[DEF92]], implicit [[DEF93]], implicit [[DEF94]], implicit [[DEF95]], implicit [[DEF96]], implicit [[DEF97]], implicit [[DEF98]], implicit [[DEF99]]
; GFX90A-NEXT: [[DEF100:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF101:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF102:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2329,6 +2337,7 @@ body: |
; GFX90A-NEXT: [[DEF107:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF108:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF109:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF100]], implicit [[DEF101]], implicit [[DEF102]], implicit [[DEF103]], implicit [[DEF104]], implicit [[DEF105]], implicit [[DEF106]], implicit [[DEF107]], implicit [[DEF108]], implicit [[DEF109]]
; GFX90A-NEXT: [[DEF110:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF111:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF112:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2339,6 +2348,7 @@ body: |
; GFX90A-NEXT: [[DEF117:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF118:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF119:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF110]], implicit [[DEF111]], implicit [[DEF112]], implicit [[DEF113]], implicit [[DEF114]], implicit [[DEF115]], implicit [[DEF116]], implicit [[DEF117]], implicit [[DEF118]], implicit [[DEF119]]
; GFX90A-NEXT: [[DEF120:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF121:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF122:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2349,6 +2359,7 @@ body: |
; GFX90A-NEXT: [[DEF127:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF128:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF129:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF120]], implicit [[DEF121]], implicit [[DEF122]], implicit [[DEF123]], implicit [[DEF124]], implicit [[DEF125]], implicit [[DEF126]], implicit [[DEF127]], implicit [[DEF128]], implicit [[DEF129]]
; GFX90A-NEXT: [[DEF130:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF131:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF132:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2359,6 +2370,7 @@ body: |
; GFX90A-NEXT: [[DEF137:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF138:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF139:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF130]], implicit [[DEF131]], implicit [[DEF132]], implicit [[DEF133]], implicit [[DEF134]], implicit [[DEF135]], implicit [[DEF136]], implicit [[DEF137]], implicit [[DEF138]], implicit [[DEF139]]
; GFX90A-NEXT: [[DEF140:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF141:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF142:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2369,6 +2381,7 @@ body: |
; GFX90A-NEXT: [[DEF147:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF148:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF149:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF140]], implicit [[DEF141]], implicit [[DEF142]], implicit [[DEF143]], implicit [[DEF144]], implicit [[DEF145]], implicit [[DEF146]], implicit [[DEF147]], implicit [[DEF148]], implicit [[DEF149]]
; GFX90A-NEXT: [[DEF150:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF151:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF152:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2379,6 +2392,7 @@ body: |
; GFX90A-NEXT: [[DEF157:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF158:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF159:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF150]], implicit [[DEF151]], implicit [[DEF152]], implicit [[DEF153]], implicit [[DEF154]], implicit [[DEF155]], implicit [[DEF156]], implicit [[DEF157]], implicit [[DEF158]], implicit [[DEF159]]
; GFX90A-NEXT: [[DEF160:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF161:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF162:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2389,6 +2403,7 @@ body: |
; GFX90A-NEXT: [[DEF167:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF168:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF169:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF160]], implicit [[DEF161]], implicit [[DEF162]], implicit [[DEF163]], implicit [[DEF164]], implicit [[DEF165]], implicit [[DEF166]], implicit [[DEF167]], implicit [[DEF168]], implicit [[DEF169]]
; GFX90A-NEXT: [[DEF170:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF171:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF172:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2399,6 +2414,7 @@ body: |
; GFX90A-NEXT: [[DEF177:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF178:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF179:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF170]], implicit [[DEF171]], implicit [[DEF172]], implicit [[DEF173]], implicit [[DEF174]], implicit [[DEF175]], implicit [[DEF176]], implicit [[DEF177]], implicit [[DEF178]], implicit [[DEF179]]
; GFX90A-NEXT: [[DEF180:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF181:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF182:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2409,6 +2425,7 @@ body: |
; GFX90A-NEXT: [[DEF187:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF188:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF189:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF180]], implicit [[DEF181]], implicit [[DEF182]], implicit [[DEF183]], implicit [[DEF184]], implicit [[DEF185]], implicit [[DEF186]], implicit [[DEF187]], implicit [[DEF188]], implicit [[DEF189]]
; GFX90A-NEXT: [[DEF190:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF191:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF192:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2419,6 +2436,7 @@ body: |
; GFX90A-NEXT: [[DEF197:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF198:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF199:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF190]], implicit [[DEF191]], implicit [[DEF192]], implicit [[DEF193]], implicit [[DEF194]], implicit [[DEF195]], implicit [[DEF196]], implicit [[DEF197]], implicit [[DEF198]], implicit [[DEF199]]
; GFX90A-NEXT: [[DEF200:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF201:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF202:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2429,6 +2447,7 @@ body: |
; GFX90A-NEXT: [[DEF207:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF208:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF209:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF200]], implicit [[DEF201]], implicit [[DEF202]], implicit [[DEF203]], implicit [[DEF204]], implicit [[DEF205]], implicit [[DEF206]], implicit [[DEF207]], implicit [[DEF208]], implicit [[DEF209]]
; GFX90A-NEXT: [[DEF210:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF211:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF212:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2439,6 +2458,7 @@ body: |
; GFX90A-NEXT: [[DEF217:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF218:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF219:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF210]], implicit [[DEF211]], implicit [[DEF212]], implicit [[DEF213]], implicit [[DEF214]], implicit [[DEF215]], implicit [[DEF216]], implicit [[DEF217]], implicit [[DEF218]], implicit [[DEF219]]
; GFX90A-NEXT: [[DEF220:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF221:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF222:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2449,6 +2469,7 @@ body: |
; GFX90A-NEXT: [[DEF227:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF228:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF229:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF220]], implicit [[DEF221]], implicit [[DEF222]], implicit [[DEF223]], implicit [[DEF224]], implicit [[DEF225]], implicit [[DEF226]], implicit [[DEF227]], implicit [[DEF228]], implicit [[DEF229]]
; GFX90A-NEXT: [[DEF230:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF231:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF232:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2459,6 +2480,7 @@ body: |
; GFX90A-NEXT: [[DEF237:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF238:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF239:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF230]], implicit [[DEF231]], implicit [[DEF232]], implicit [[DEF233]], implicit [[DEF234]], implicit [[DEF235]], implicit [[DEF236]], implicit [[DEF237]], implicit [[DEF238]], implicit [[DEF239]]
; GFX90A-NEXT: [[DEF240:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF241:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF242:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2469,43 +2491,17 @@ body: |
; GFX90A-NEXT: [[DEF247:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF248:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF249:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF240]], implicit [[DEF241]], implicit [[DEF242]], implicit [[DEF243]], implicit [[DEF244]], implicit [[DEF245]], implicit [[DEF246]], implicit [[DEF247]], implicit [[DEF248]], implicit [[DEF249]]
; GFX90A-NEXT: [[DEF250:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF251:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF252:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF253:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF254:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF255:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF256:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 257, implicit $exec, implicit $mode
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 258, implicit $exec, implicit $mode
- ; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.1:
- ; GFX90A-NEXT: [[DEF256:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF256]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]], implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]], implicit [[DEF18]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]], implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]], implicit [[DEF28]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF29]], implicit [[DEF30]], implicit [[DEF31]], implicit [[DEF32]], implicit [[DEF33]], implicit [[DEF34]], implicit [[DEF35]], implicit [[DEF36]], implicit [[DEF37]], implicit [[DEF38]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF39]], implicit [[DEF40]], implicit [[DEF41]], implicit [[DEF42]], implicit [[DEF43]], implicit [[DEF44]], implicit [[DEF45]], implicit [[DEF46]], implicit [[DEF47]], implicit [[DEF48]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF49]], implicit [[DEF50]], implicit [[DEF51]], implicit [[DEF52]], implicit [[DEF53]], implicit [[DEF54]], implicit [[DEF55]], implicit [[DEF56]], implicit [[DEF57]], implicit [[DEF58]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF59]], implicit [[DEF60]], implicit [[DEF61]], implicit [[DEF62]], implicit [[DEF63]], implicit [[DEF64]], implicit [[DEF65]], implicit [[DEF66]], implicit [[DEF67]], implicit [[DEF68]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF69]], implicit [[DEF70]], implicit [[DEF71]], implicit [[DEF72]], implicit [[DEF73]], implicit [[DEF74]], implicit [[DEF75]], implicit [[DEF76]], implicit [[DEF77]], implicit [[DEF78]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF79]], implicit [[DEF80]], implicit [[DEF81]], implicit [[DEF82]], implicit [[DEF83]], implicit [[DEF84]], implicit [[DEF85]], implicit [[DEF86]], implicit [[DEF87]], implicit [[DEF88]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF89]], implicit [[DEF90]], implicit [[DEF91]], implicit [[DEF92]], implicit [[DEF93]], implicit [[DEF94]], implicit [[DEF95]], implicit [[DEF96]], implicit [[DEF97]], implicit [[DEF98]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF99]], implicit [[DEF100]], implicit [[DEF101]], implicit [[DEF102]], implicit [[DEF103]], implicit [[DEF104]], implicit [[DEF105]], implicit [[DEF106]], implicit [[DEF107]], implicit [[DEF108]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF109]], implicit [[DEF110]], implicit [[DEF111]], implicit [[DEF112]], implicit [[DEF113]], implicit [[DEF114]], implicit [[DEF115]], implicit [[DEF116]], implicit [[DEF117]], implicit [[DEF118]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF119]], implicit [[DEF120]], implicit [[DEF121]], implicit [[DEF122]], implicit [[DEF123]], implicit [[DEF124]], implicit [[DEF125]], implicit [[DEF126]], implicit [[DEF127]], implicit [[DEF128]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF129]], implicit [[DEF130]], implicit [[DEF131]], implicit [[DEF132]], implicit [[DEF133]], implicit [[DEF134]], implicit [[DEF135]], implicit [[DEF136]], implicit [[DEF137]], implicit [[DEF138]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF139]], implicit [[DEF140]], implicit [[DEF141]], implicit [[DEF142]], implicit [[DEF143]], implicit [[DEF144]], implicit [[DEF145]], implicit [[DEF146]], implicit [[DEF147]], implicit [[DEF148]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF149]], implicit [[DEF150]], implicit [[DEF151]], implicit [[DEF152]], implicit [[DEF153]], implicit [[DEF154]], implicit [[DEF155]], implicit [[DEF156]], implicit [[DEF157]], implicit [[DEF158]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF159]], implicit [[DEF160]], implicit [[DEF161]], implicit [[DEF162]], implicit [[DEF163]], implicit [[DEF164]], implicit [[DEF165]], implicit [[DEF166]], implicit [[DEF167]], implicit [[DEF168]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF169]], implicit [[DEF170]], implicit [[DEF171]], implicit [[DEF172]], implicit [[DEF173]], implicit [[DEF174]], implicit [[DEF175]], implicit [[DEF176]], implicit [[DEF177]], implicit [[DEF178]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF179]], implicit [[DEF180]], implicit [[DEF181]], implicit [[DEF182]], implicit [[DEF183]], implicit [[DEF184]], implicit [[DEF185]], implicit [[DEF186]], implicit [[DEF187]], implicit [[DEF188]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF189]], implicit [[DEF190]], implicit [[DEF191]], implicit [[DEF192]], implicit [[DEF193]], implicit [[DEF194]], implicit [[DEF195]], implicit [[DEF196]], implicit [[DEF197]], implicit [[DEF198]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF199]], implicit [[DEF200]], implicit [[DEF201]], implicit [[DEF202]], implicit [[DEF203]], implicit [[DEF204]], implicit [[DEF205]], implicit [[DEF206]], implicit [[DEF207]], implicit [[DEF208]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF209]], implicit [[DEF210]], implicit [[DEF211]], implicit [[DEF212]], implicit [[DEF213]], implicit [[DEF214]], implicit [[DEF215]], implicit [[DEF216]], implicit [[DEF217]], implicit [[DEF218]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF219]], implicit [[DEF220]], implicit [[DEF221]], implicit [[DEF222]], implicit [[DEF223]], implicit [[DEF224]], implicit [[DEF225]], implicit [[DEF226]], implicit [[DEF227]], implicit [[DEF228]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF229]], implicit [[DEF230]], implicit [[DEF231]], implicit [[DEF232]], implicit [[DEF233]], implicit [[DEF234]], implicit [[DEF235]], implicit [[DEF236]], implicit [[DEF237]], implicit [[DEF238]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF239]], implicit [[DEF240]], implicit [[DEF241]], implicit [[DEF242]], implicit [[DEF243]], implicit [[DEF244]], implicit [[DEF245]], implicit [[DEF246]], implicit [[DEF247]], implicit [[DEF248]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF249]], implicit [[DEF250]], implicit [[DEF251]], implicit [[DEF252]], implicit [[DEF253]], implicit [[DEF254]], implicit [[DEF255]], implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF250]], implicit [[DEF251]], implicit [[DEF252]], implicit [[DEF253]], implicit [[DEF254]], implicit [[DEF255]], implicit [[DEF256]], implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
; GFX90A-NEXT: S_ENDPGM 0
bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
index c117473581746..b6a13809a620b 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
@@ -35,7 +35,6 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
@@ -51,6 +50,7 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_14]], implicit [[V_CVT_I32_F64_e32_15]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]]
@@ -351,15 +351,15 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]]
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_24]], implicit [[V_CVT_I32_F64_e32_22]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_22]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
@@ -589,29 +589,29 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]]
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_24]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_22]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_24]], implicit [[V_CVT_I32_F64_e32_23]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
; GFX908-NEXT: successors: %bb.3(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode
; GFX908-NEXT: S_NOP 0
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.3:
; GFX908-NEXT: successors: %bb.4(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_26]]
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_25]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.4:
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
@@ -1095,95 +1095,14 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
- ; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
- ; GFX908-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 2
- ; GFX908-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 3
- ; GFX908-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 4
- ; GFX908-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 5
- ; GFX908-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 6
- ; GFX908-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 7
- ; GFX908-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 8
- ; GFX908-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 9
- ; GFX908-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 10
- ; GFX908-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
- ; GFX908-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 12
- ; GFX908-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 13
- ; GFX908-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 14
- ; GFX908-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 15
- ; GFX908-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 16
- ; GFX908-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 17
- ; GFX908-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 18
- ; GFX908-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 19
- ; GFX908-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 20
- ; GFX908-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 21
- ; GFX908-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
- ; GFX908-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
- ; GFX908-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
- ; GFX908-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
- ; GFX908-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
- ; GFX908-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
- ; GFX908-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
- ; GFX908-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
- ; GFX908-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 30
- ; GFX908-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 31
- ; GFX908-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 32
- ; GFX908-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 33
- ; GFX908-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 34
- ; GFX908-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 35
- ; GFX908-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 36
- ; GFX908-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 37
- ; GFX908-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 38
- ; GFX908-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 39
- ; GFX908-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 40
- ; GFX908-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 41
- ; GFX908-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 42
- ; GFX908-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 43
- ; GFX908-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 44
- ; GFX908-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 45
- ; GFX908-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 46
- ; GFX908-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
- ; GFX908-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 48
- ; GFX908-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 49
- ; GFX908-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 50
- ; GFX908-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 51
- ; GFX908-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 52
- ; GFX908-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 53
- ; GFX908-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 54
- ; GFX908-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 55
- ; GFX908-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 56
- ; GFX908-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 57
- ; GFX908-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 58
- ; GFX908-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 59
- ; GFX908-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 60
- ; GFX908-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 61
- ; GFX908-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 62
- ; GFX908-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 63
- ; GFX908-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 64
- ; GFX908-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 65
- ; GFX908-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 66
- ; GFX908-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 67
- ; GFX908-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 68
- ; GFX908-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
- ; GFX908-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
- ; GFX908-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[S_MOV_B32_80:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
- ; GFX908-NEXT: [[S_MOV_B32_81:%[0-9]+]]:sgpr_32 = S_MOV_B32 80
; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
@@ -1237,46 +1156,127 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_32]]
+ ; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+ ; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]]
+ ; GFX908-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 2
+ ; GFX908-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 3
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
+ ; GFX908-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 4
+ ; GFX908-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 5
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_5]], implicit [[S_MOV_B32_6]]
+ ; GFX908-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 6
+ ; GFX908-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 7
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_7]], implicit [[S_MOV_B32_8]]
+ ; GFX908-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 8
+ ; GFX908-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 9
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_9]], implicit [[S_MOV_B32_10]]
+ ; GFX908-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 10
+ ; GFX908-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_11]], implicit [[S_MOV_B32_12]]
+ ; GFX908-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 12
+ ; GFX908-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 13
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_13]], implicit [[S_MOV_B32_14]]
+ ; GFX908-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 14
+ ; GFX908-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 15
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_15]], implicit [[S_MOV_B32_16]]
+ ; GFX908-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 16
+ ; GFX908-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 17
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_17]], implicit [[S_MOV_B32_18]]
+ ; GFX908-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 18
+ ; GFX908-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 19
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_19]], implicit [[S_MOV_B32_20]]
+ ; GFX908-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 20
+ ; GFX908-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 21
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_21]], implicit [[S_MOV_B32_22]]
+ ; GFX908-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
+ ; GFX908-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_23]], implicit [[S_MOV_B32_24]]
+ ; GFX908-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
+ ; GFX908-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_25]], implicit [[S_MOV_B32_26]]
+ ; GFX908-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
+ ; GFX908-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_27]], implicit [[S_MOV_B32_28]]
+ ; GFX908-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
+ ; GFX908-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_29]], implicit [[S_MOV_B32_30]]
+ ; GFX908-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 30
+ ; GFX908-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 31
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_31]], implicit [[S_MOV_B32_32]]
+ ; GFX908-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 32
+ ; GFX908-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 33
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_33]], implicit [[S_MOV_B32_34]]
+ ; GFX908-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 34
+ ; GFX908-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 35
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_35]], implicit [[S_MOV_B32_36]]
+ ; GFX908-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 36
+ ; GFX908-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 37
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_37]], implicit [[S_MOV_B32_38]]
+ ; GFX908-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 38
+ ; GFX908-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 39
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_39]], implicit [[S_MOV_B32_40]]
+ ; GFX908-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 40
+ ; GFX908-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 41
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_41]], implicit [[S_MOV_B32_42]]
+ ; GFX908-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 42
+ ; GFX908-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 43
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_43]], implicit [[S_MOV_B32_44]]
+ ; GFX908-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 44
+ ; GFX908-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 45
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_45]], implicit [[S_MOV_B32_46]]
+ ; GFX908-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 46
+ ; GFX908-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_47]], implicit [[S_MOV_B32_48]]
+ ; GFX908-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 48
+ ; GFX908-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 49
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_49]], implicit [[S_MOV_B32_50]]
+ ; GFX908-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 50
+ ; GFX908-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 51
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_51]], implicit [[S_MOV_B32_52]]
+ ; GFX908-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 52
+ ; GFX908-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 53
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_53]], implicit [[S_MOV_B32_54]]
+ ; GFX908-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 54
+ ; GFX908-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 55
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_55]], implicit [[S_MOV_B32_56]]
+ ; GFX908-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 56
+ ; GFX908-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 57
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_57]], implicit [[S_MOV_B32_58]]
+ ; GFX908-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 58
+ ; GFX908-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 59
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_59]], implicit [[S_MOV_B32_60]]
+ ; GFX908-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 60
+ ; GFX908-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 61
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_61]], implicit [[S_MOV_B32_62]]
+ ; GFX908-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 62
+ ; GFX908-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 63
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_63]], implicit [[S_MOV_B32_64]]
+ ; GFX908-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 64
+ ; GFX908-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 65
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_65]], implicit [[S_MOV_B32_66]]
+ ; GFX908-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 66
+ ; GFX908-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 67
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_67]], implicit [[S_MOV_B32_68]]
+ ; GFX908-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 68
+ ; GFX908-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_69]], implicit [[S_MOV_B32_70]]
+ ; GFX908-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
+ ; GFX908-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_71]], implicit [[S_MOV_B32_72]]
+ ; GFX908-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
+ ; GFX908-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_73]], implicit [[S_MOV_B32_74]]
+ ; GFX908-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
+ ; GFX908-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_75]], implicit [[S_MOV_B32_76]]
+ ; GFX908-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
+ ; GFX908-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_77]], implicit [[S_MOV_B32_78]]
+ ; GFX908-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
+ ; GFX908-NEXT: [[S_MOV_B32_80:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_79]], implicit [[S_MOV_B32_80]]
+ ; GFX908-NEXT: [[S_MOV_B32_81:%[0-9]+]]:sgpr_32 = S_MOV_B32 80
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_81]]
; GFX908-NEXT: S_ENDPGM 0
bb.0:
@@ -1525,17 +1525,17 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_24]], implicit [[V_CVT_I32_F64_e32_25]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_5]]
@@ -1638,80 +1638,7 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 8
- ; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 9
- ; GFX908-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 10
- ; GFX908-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
- ; GFX908-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 12
- ; GFX908-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 13
- ; GFX908-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 14
- ; GFX908-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 15
- ; GFX908-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 16
- ; GFX908-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 17
- ; GFX908-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 18
- ; GFX908-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 19
- ; GFX908-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 20
- ; GFX908-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 21
- ; GFX908-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
- ; GFX908-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
- ; GFX908-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
- ; GFX908-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
- ; GFX908-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
- ; GFX908-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
- ; GFX908-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
- ; GFX908-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
- ; GFX908-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 30
- ; GFX908-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 31
- ; GFX908-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 32
- ; GFX908-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 33
- ; GFX908-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 34
- ; GFX908-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 35
- ; GFX908-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 36
- ; GFX908-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 37
- ; GFX908-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 38
- ; GFX908-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 39
- ; GFX908-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 40
- ; GFX908-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 41
- ; GFX908-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 42
- ; GFX908-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 43
- ; GFX908-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 44
- ; GFX908-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 45
- ; GFX908-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 46
- ; GFX908-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
- ; GFX908-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 48
- ; GFX908-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 49
- ; GFX908-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 50
- ; GFX908-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 51
- ; GFX908-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 52
- ; GFX908-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 53
- ; GFX908-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 54
- ; GFX908-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 55
- ; GFX908-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 56
- ; GFX908-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 57
- ; GFX908-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 58
- ; GFX908-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 59
- ; GFX908-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 60
- ; GFX908-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 61
- ; GFX908-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 62
- ; GFX908-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 63
- ; GFX908-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 64
- ; GFX908-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 65
- ; GFX908-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 66
- ; GFX908-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 67
- ; GFX908-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 68
- ; GFX908-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
- ; GFX908-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
- ; GFX908-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
- ; GFX908-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
- ; GFX908-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
- ; GFX908-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
- ; GFX908-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
- ; GFX908-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
- ; GFX908-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
- ; GFX908-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
- ; GFX908-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 80
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: {{ $}}
@@ -1758,55 +1685,128 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_9]], implicit [[V_CVT_I32_F64_e32_19]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_22]]
- ; GFX908-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
- ; GFX908-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_74]], implicit [[S_MOV_B32_75]]
- ; GFX908-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 2
- ; GFX908-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 3
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_76]], implicit [[S_MOV_B32_77]]
- ; GFX908-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 4
- ; GFX908-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 5
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_78]], implicit [[S_MOV_B32_79]]
- ; GFX908-NEXT: [[S_MOV_B32_80:%[0-9]+]]:sgpr_32 = S_MOV_B32 6
- ; GFX908-NEXT: [[S_MOV_B32_81:%[0-9]+]]:sgpr_32 = S_MOV_B32 7
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_80]], implicit [[S_MOV_B32_81]]
+ ; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+ ; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]]
+ ; GFX908-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 2
+ ; GFX908-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 3
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
+ ; GFX908-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 4
+ ; GFX908-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 5
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_5]], implicit [[S_MOV_B32_6]]
+ ; GFX908-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 6
+ ; GFX908-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 7
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_7]], implicit [[S_MOV_B32_8]]
+ ; GFX908-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 8
+ ; GFX908-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 9
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_9]], implicit [[S_MOV_B32_10]]
+ ; GFX908-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 10
+ ; GFX908-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_11]], implicit [[S_MOV_B32_12]]
+ ; GFX908-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 12
+ ; GFX908-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 13
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_13]], implicit [[S_MOV_B32_14]]
+ ; GFX908-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 14
+ ; GFX908-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 15
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_15]], implicit [[S_MOV_B32_16]]
+ ; GFX908-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 16
+ ; GFX908-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 17
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_17]], implicit [[S_MOV_B32_18]]
+ ; GFX908-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 18
+ ; GFX908-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 19
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_19]], implicit [[S_MOV_B32_20]]
+ ; GFX908-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 20
+ ; GFX908-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 21
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_21]], implicit [[S_MOV_B32_22]]
+ ; GFX908-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
+ ; GFX908-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_23]], implicit [[S_MOV_B32_24]]
+ ; GFX908-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
+ ; GFX908-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_25]], implicit [[S_MOV_B32_26]]
+ ; GFX908-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
+ ; GFX908-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_27]], implicit [[S_MOV_B32_28]]
+ ; GFX908-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
+ ; GFX908-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_29]], implicit [[S_MOV_B32_30]]
+ ; GFX908-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 30
+ ; GFX908-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 31
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_31]], implicit [[S_MOV_B32_32]]
+ ; GFX908-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 32
+ ; GFX908-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 33
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_33]], implicit [[S_MOV_B32_34]]
+ ; GFX908-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 34
+ ; GFX908-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 35
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_35]], implicit [[S_MOV_B32_36]]
+ ; GFX908-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 36
+ ; GFX908-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 37
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_37]], implicit [[S_MOV_B32_38]]
+ ; GFX908-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 38
+ ; GFX908-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 39
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_39]], implicit [[S_MOV_B32_40]]
+ ; GFX908-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 40
+ ; GFX908-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 41
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_41]], implicit [[S_MOV_B32_42]]
+ ; GFX908-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 42
+ ; GFX908-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 43
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_43]], implicit [[S_MOV_B32_44]]
+ ; GFX908-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 44
+ ; GFX908-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 45
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_45]], implicit [[S_MOV_B32_46]]
+ ; GFX908-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 46
+ ; GFX908-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_47]], implicit [[S_MOV_B32_48]]
+ ; GFX908-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 48
+ ; GFX908-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 49
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_49]], implicit [[S_MOV_B32_50]]
+ ; GFX908-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 50
+ ; GFX908-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 51
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_51]], implicit [[S_MOV_B32_52]]
+ ; GFX908-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 52
+ ; GFX908-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 53
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_53]], implicit [[S_MOV_B32_54]]
+ ; GFX908-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 54
+ ; GFX908-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 55
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_55]], implicit [[S_MOV_B32_56]]
+ ; GFX908-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 56
+ ; GFX908-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 57
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_57]], implicit [[S_MOV_B32_58]]
+ ; GFX908-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 58
+ ; GFX908-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 59
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_59]], implicit [[S_MOV_B32_60]]
+ ; GFX908-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 60
+ ; GFX908-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 61
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_61]], implicit [[S_MOV_B32_62]]
+ ; GFX908-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 62
+ ; GFX908-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 63
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_63]], implicit [[S_MOV_B32_64]]
+ ; GFX908-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 64
+ ; GFX908-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 65
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_65]], implicit [[S_MOV_B32_66]]
+ ; GFX908-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 66
+ ; GFX908-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 67
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_67]], implicit [[S_MOV_B32_68]]
+ ; GFX908-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 68
+ ; GFX908-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_69]], implicit [[S_MOV_B32_70]]
+ ; GFX908-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
+ ; GFX908-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_71]], implicit [[S_MOV_B32_72]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_73]]
+ ; GFX908-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
+ ; GFX908-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_73]], implicit [[S_MOV_B32_74]]
+ ; GFX908-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
+ ; GFX908-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_75]], implicit [[S_MOV_B32_76]]
+ ; GFX908-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
+ ; GFX908-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_77]], implicit [[S_MOV_B32_78]]
+ ; GFX908-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
+ ; GFX908-NEXT: [[S_MOV_B32_80:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_79]], implicit [[S_MOV_B32_80]]
+ ; GFX908-NEXT: [[S_MOV_B32_81:%[0-9]+]]:sgpr_32 = S_MOV_B32 80
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_81]]
; GFX908-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr0_sgpr1
@@ -2044,105 +2044,87 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 12
- ; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 13
- ; GFX908-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 14
- ; GFX908-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 15
- ; GFX908-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 16
- ; GFX908-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 17
- ; GFX908-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 18
- ; GFX908-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 19
- ; GFX908-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 20
- ; GFX908-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 21
- ; GFX908-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
- ; GFX908-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
- ; GFX908-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
- ; GFX908-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
- ; GFX908-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
- ; GFX908-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
- ; GFX908-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
- ; GFX908-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
- ; GFX908-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 30
- ; GFX908-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 31
- ; GFX908-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 32
- ; GFX908-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 33
- ; GFX908-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 34
- ; GFX908-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 35
- ; GFX908-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 36
- ; GFX908-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 37
- ; GFX908-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 38
- ; GFX908-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 39
- ; GFX908-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 40
- ; GFX908-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 41
- ; GFX908-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 42
- ; GFX908-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 43
- ; GFX908-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 44
- ; GFX908-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 45
- ; GFX908-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 46
- ; GFX908-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
- ; GFX908-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 48
- ; GFX908-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 49
- ; GFX908-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 50
- ; GFX908-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 51
- ; GFX908-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 52
- ; GFX908-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 53
- ; GFX908-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 54
- ; GFX908-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 55
- ; GFX908-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 56
- ; GFX908-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 57
- ; GFX908-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 58
- ; GFX908-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 59
- ; GFX908-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 60
- ; GFX908-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 61
- ; GFX908-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 62
- ; GFX908-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 63
- ; GFX908-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 64
- ; GFX908-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 65
- ; GFX908-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 66
- ; GFX908-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 67
- ; GFX908-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 68
- ; GFX908-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
- ; GFX908-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
- ; GFX908-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
- ; GFX908-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
- ; GFX908-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
- ; GFX908-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
- ; GFX908-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
- ; GFX908-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
- ; GFX908-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
- ; GFX908-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
- ; GFX908-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
- ; GFX908-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 80
- ; GFX908-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 81
- ; GFX908-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 82
- ; GFX908-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 83
+ ; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+ ; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
+ ; GFX908-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 2
+ ; GFX908-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 3
+ ; GFX908-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 4
+ ; GFX908-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 5
+ ; GFX908-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 6
+ ; GFX908-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 7
+ ; GFX908-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 8
+ ; GFX908-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 9
+ ; GFX908-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 10
+ ; GFX908-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
+ ; GFX908-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 12
+ ; GFX908-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 13
+ ; GFX908-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 14
+ ; GFX908-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 15
+ ; GFX908-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 16
+ ; GFX908-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 17
+ ; GFX908-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 18
+ ; GFX908-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 19
+ ; GFX908-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 20
+ ; GFX908-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 21
+ ; GFX908-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
+ ; GFX908-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
+ ; GFX908-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
+ ; GFX908-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
+ ; GFX908-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
+ ; GFX908-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
+ ; GFX908-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
+ ; GFX908-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
+ ; GFX908-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 30
+ ; GFX908-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 31
+ ; GFX908-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 32
+ ; GFX908-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 33
+ ; GFX908-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 34
+ ; GFX908-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 35
+ ; GFX908-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 36
+ ; GFX908-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 37
+ ; GFX908-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 38
+ ; GFX908-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 39
+ ; GFX908-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 40
+ ; GFX908-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 41
+ ; GFX908-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 42
+ ; GFX908-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 43
+ ; GFX908-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 44
+ ; GFX908-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 45
+ ; GFX908-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 46
+ ; GFX908-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
+ ; GFX908-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 48
+ ; GFX908-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 49
+ ; GFX908-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 50
+ ; GFX908-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 51
+ ; GFX908-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 52
+ ; GFX908-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 53
+ ; GFX908-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 54
+ ; GFX908-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 55
+ ; GFX908-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 56
+ ; GFX908-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 57
+ ; GFX908-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 58
+ ; GFX908-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 59
+ ; GFX908-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 60
+ ; GFX908-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 61
+ ; GFX908-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 62
+ ; GFX908-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 63
+ ; GFX908-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 64
+ ; GFX908-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 65
+ ; GFX908-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 66
+ ; GFX908-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 67
+ ; GFX908-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 68
+ ; GFX908-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 84
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
+ ; GFX908-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
+ ; GFX908-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
- ; GFX908-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_74]], implicit [[S_MOV_B32_75]]
- ; GFX908-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 2
- ; GFX908-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 3
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_76]], implicit [[S_MOV_B32_77]]
- ; GFX908-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 4
- ; GFX908-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 5
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_78]], implicit [[S_MOV_B32_79]]
- ; GFX908-NEXT: [[S_MOV_B32_80:%[0-9]+]]:sgpr_32 = S_MOV_B32 6
- ; GFX908-NEXT: [[S_MOV_B32_81:%[0-9]+]]:sgpr_32 = S_MOV_B32 7
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_80]], implicit [[S_MOV_B32_81]]
- ; GFX908-NEXT: [[S_MOV_B32_82:%[0-9]+]]:sgpr_32 = S_MOV_B32 8
- ; GFX908-NEXT: [[S_MOV_B32_83:%[0-9]+]]:sgpr_32 = S_MOV_B32 9
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_82]], implicit [[S_MOV_B32_83]]
- ; GFX908-NEXT: [[S_MOV_B32_84:%[0-9]+]]:sgpr_32 = S_MOV_B32 10
- ; GFX908-NEXT: [[S_MOV_B32_85:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_84]], implicit [[S_MOV_B32_85]]
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]]
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_5]], implicit [[S_MOV_B32_6]]
@@ -2179,7 +2161,25 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_67]], implicit [[S_MOV_B32_68]]
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_69]], implicit [[S_MOV_B32_70]]
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_71]], implicit [[S_MOV_B32_72]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_73]]
+ ; GFX908-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_73]], implicit [[S_MOV_B32_74]]
+ ; GFX908-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
+ ; GFX908-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_76]], implicit [[S_MOV_B32_75]]
+ ; GFX908-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
+ ; GFX908-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_78]], implicit [[S_MOV_B32_77]]
+ ; GFX908-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
+ ; GFX908-NEXT: [[S_MOV_B32_80:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_80]], implicit [[S_MOV_B32_79]]
+ ; GFX908-NEXT: [[S_MOV_B32_81:%[0-9]+]]:sgpr_32 = S_MOV_B32 81
+ ; GFX908-NEXT: [[S_MOV_B32_82:%[0-9]+]]:sgpr_32 = S_MOV_B32 80
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_82]], implicit [[S_MOV_B32_81]]
+ ; GFX908-NEXT: [[S_MOV_B32_83:%[0-9]+]]:sgpr_32 = S_MOV_B32 83
+ ; GFX908-NEXT: [[S_MOV_B32_84:%[0-9]+]]:sgpr_32 = S_MOV_B32 82
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_84]], implicit [[S_MOV_B32_83]]
+ ; GFX908-NEXT: [[S_MOV_B32_85:%[0-9]+]]:sgpr_32 = S_MOV_B32 84
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_85]]
; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_GT_U32_e64_]], implicit-def dead $scc
; GFX908-NEXT: $exec = S_MOV_B64_term [[S_AND_B64_]]
@@ -2457,30 +2457,30 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_24]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
; GFX908-NEXT: successors: %bb.3(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_24]]
; GFX908-NEXT: S_NOP 0
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.3:
; GFX908-NEXT: successors: %bb.4(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.4:
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_27]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_5]]
@@ -5669,13 +5669,13 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_24]], implicit [[V_CVT_I32_F64_e32_23]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
@@ -5981,7 +5981,6 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_F64_I32_e32_:%[0-9]+]]:vreg_64 = nofpexcept V_CVT_F64_I32_e32 22, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
@@ -5990,6 +5989,7 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
+ ; GFX908-NEXT: [[V_CVT_F64_I32_e32_:%[0-9]+]]:vreg_64 = nofpexcept V_CVT_F64_I32_e32 22, implicit $exec, implicit $mode
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_F64_I32_e32_]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
@@ -6088,12 +6088,12 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_F64_I32_e32_:%[0-9]+]]:vreg_64 = nofpexcept V_CVT_F64_I32_e32 22, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_F64_I32_e32_:%[0-9]+]]:vreg_64 = nofpexcept V_CVT_F64_I32_e32 22, implicit $exec, implicit $mode
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_F64_I32_e32_]], implicit [[V_CVT_I32_F64_e32_22]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
@@ -6329,7 +6329,7 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
- ; GFX908-NEXT: DBG_VALUE %23, 0, 0
+ ; GFX908-NEXT: DBG_VALUE %23:vgpr_32, 0, 0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_24]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
@@ -6514,56 +6514,55 @@ body: |
; GFX908: bb.0:
; GFX908-NEXT: successors: %bb.1(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
- ; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: dead [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: dead [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
; GFX908-NEXT: successors: %bb.3(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_5]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]], implicit [[V_CVT_I32_F64_e32_10]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]], implicit [[V_CVT_I32_F64_e32_15]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]], implicit [[V_CVT_I32_F64_e32_20]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]], implicit [[V_CVT_I32_F64_e32_25]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.3:
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]]
; GFX908-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1
@@ -6920,7 +6919,6 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
@@ -6931,6 +6929,7 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
@@ -7056,14 +7055,14 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]]
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_34:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 34, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_34:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 34, implicit $exec, implicit $mode
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]]
@@ -7180,9 +7179,6 @@ body: |
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode
- ; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
; GFX908-NEXT: successors: %bb.3(0x80000000)
; GFX908-NEXT: {{ $}}
@@ -7192,6 +7188,8 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.3:
@@ -7282,10 +7280,9 @@ body: |
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
; GFX908-NEXT: $vgpr8 = IMPLICIT_DEF
; GFX908-NEXT: $vgpr9 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
@@ -7302,60 +7299,61 @@ body: |
; GFX908-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
- ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 $vgpr8, implicit $exec, implicit $mode
; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 $vgpr9, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: dead [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: S_BRANCH %bb.1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_11]]
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF29]]
; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_31]], implicit [[DEF30]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_31]], implicit [[DEF30]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_12]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_13]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_6]], implicit [[V_CVT_I32_F32_e32_14]]
@@ -7482,8 +7480,7 @@ body: |
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
- ; GFX908-NEXT: dead [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
@@ -7500,64 +7497,65 @@ body: |
; GFX908-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
- ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: dead [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
+ ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: $vgpr8 = IMPLICIT_DEF
; GFX908-NEXT: $vgpr9 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 2, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
; GFX908-NEXT: S_BRANCH %bb.1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 255
; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 [[S_MOV_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_9]]
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_31]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF29]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_28]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF30]]
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_28]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF29]]
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 2, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_30]], implicit [[V_CVT_I32_F32_e32_31]], implicit [[DEF30]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_10]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_11]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_12]]
@@ -7738,32 +7736,32 @@ body: |
; GFX908-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF31:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: S_BRANCH %bb.1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_6]]
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_30]], implicit [[V_CVT_I32_F32_e32_25]], implicit [[DEF30]], implicit [[DEF25]]
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_22]], implicit [[V_CVT_I32_F32_e32_23]], implicit [[DEF22]], implicit [[DEF27]]
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_24]], implicit [[V_CVT_I32_F32_e32_25]], implicit [[DEF23]], implicit [[DEF28]]
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_26]], implicit [[V_CVT_I32_F32_e32_27]], implicit [[DEF24]], implicit [[DEF29]]
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_31]], implicit [[V_CVT_I32_F32_e32_26]], implicit [[DEF31]], implicit [[DEF26]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_22]], implicit [[V_CVT_I32_F32_e32_27]], implicit [[DEF22]], implicit [[DEF27]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF23]], implicit [[DEF28]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_24]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF24]], implicit [[DEF29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_28]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF25]], implicit [[DEF30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_30]], implicit [[V_CVT_I32_F32_e32_31]], implicit [[DEF26]], implicit [[DEF31]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_7]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_8]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_9]]
@@ -8084,58 +8082,58 @@ body: |
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
- ; GFX908-NEXT: undef [[DEF:%[0-9]+]].sub0:vreg_512 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF:%[0-9]+]].sub1:vreg_512 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF:%[0-9]+]].sub2:vreg_512 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF:%[0-9]+]].sub3:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
- ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: undef [[DEF22:%[0-9]+]].sub1:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
+ ; GFX908-NEXT: undef [[DEF22:%[0-9]+]].sub0:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF22:%[0-9]+]].sub2:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF22:%[0-9]+]].sub3:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
@@ -8144,25 +8142,25 @@ body: |
; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF31:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]].sub2, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
; GFX908-NEXT: S_BRANCH %bb.1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_6]]
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]].sub0, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_31]], implicit [[V_CVT_I32_F32_e32_26]], implicit [[DEF]].sub0, implicit [[DEF]].sub2
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_22]], implicit [[V_CVT_I32_F32_e32_27]], implicit [[DEF24]], implicit [[DEF28]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF25]], implicit [[DEF29]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_24]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF26]], implicit [[DEF30]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_25]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF27]], implicit [[DEF31]]
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]].sub0, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]].sub2, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_22]], implicit [[V_CVT_I32_F32_e32_23]], implicit [[DEF22]].sub0, implicit [[DEF22]].sub2
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_24]], implicit [[V_CVT_I32_F32_e32_25]], implicit [[DEF24]], implicit [[DEF28]]
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_26]], implicit [[V_CVT_I32_F32_e32_27]], implicit [[DEF25]], implicit [[DEF29]]
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_28]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF26]], implicit [[DEF30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_30]], implicit [[V_CVT_I32_F32_e32_31]], implicit [[DEF27]], implicit [[DEF31]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_7]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_8]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_9]]
@@ -8555,24 +8553,24 @@ body: |
; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF31:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
; GFX908-NEXT: undef [[V_CVT_I32_F32_e32_1:%[0-9]+]].sub0:vreg_64 = nofpexcept V_CVT_I32_F32_e32 [[DEF]].sub0, implicit $exec, implicit $mode
; GFX908-NEXT: S_BRANCH %bb.1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_7]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_8]]
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]].sub2, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]].sub0, implicit [[V_CVT_I32_F32_e32_27]], implicit [[DEF]].sub0, implicit [[DEF]].sub2
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF1]], implicit [[DEF28]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_24]], implicit [[DEF25]], implicit [[DEF29]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_29]], implicit [[V_CVT_I32_F32_e32_25]], implicit [[DEF26]], implicit [[DEF30]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_30]], implicit [[V_CVT_I32_F32_e32_26]], implicit [[DEF27]], implicit [[DEF31]]
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]].sub2, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]].sub0, implicit [[V_CVT_I32_F32_e32_24]], implicit [[DEF]].sub0, implicit [[DEF]].sub2
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_25]], implicit [[DEF1]], implicit [[DEF28]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_26]], implicit [[DEF25]], implicit [[DEF29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_27]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF26]], implicit [[DEF30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_29]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF27]], implicit [[DEF31]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_9]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_10]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_6]], implicit [[V_CVT_I32_F32_e32_11]]
@@ -8741,7 +8739,6 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_41:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 51, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_42:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 52, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: undef [[V_RCP_F32_e32_:%[0-9]+]].sub0:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_RCP_F32_e32 [[DEF]].sub0, implicit $mode, implicit $exec
- ; GFX908-NEXT: [[DEF2:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
@@ -8758,8 +8755,9 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_9]], implicit [[V_CVT_I32_F64_e32_19]], implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_39]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
- ; GFX908-NEXT: dead [[DEF3:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; GFX908-NEXT: undef [[V_FMA_F32_e64_:%[0-9]+]].sub0:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_FMA_F32_e64 0, [[DEF2]].sub1, 0, [[V_RCP_F32_e32_]].sub0, 0, [[DEF2]].sub0, 0, 0, implicit $mode, implicit $exec
+ ; GFX908-NEXT: dead [[DEF2:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF3:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; GFX908-NEXT: undef [[V_FMA_F32_e64_:%[0-9]+]].sub0:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_FMA_F32_e64 0, [[DEF3]].sub1, 0, [[V_RCP_F32_e32_]].sub0, 0, [[DEF3]].sub0, 0, 0, implicit $mode, implicit $exec
; GFX908-NEXT: %temp2:vreg_64_align2 = IMPLICIT_DEF
; GFX908-NEXT: [[V_PK_MUL_F32_:%[0-9]+]]:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_PK_MUL_F32 0, [[V_RCP_F32_e32_]], 8, [[DEF]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
; GFX908-NEXT: dead [[V_PK_FMA_F32_:%[0-9]+]]:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_PK_FMA_F32 0, [[V_FMA_F32_e64_]], 8, %temp2, 11, [[V_PK_MUL_F32_]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
index 680942fcb4d4b..ff43609bfec53 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
+++ b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
@@ -221,11 +221,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_mov_b32 s12, s51
; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
-; CHECK-NEXT: v_add_nc_u32_e32 v60, 1, v58
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; CHECK-NEXT: ds_write_b32 v0, v60
+; CHECK-NEXT: v_add_nc_u32_e32 v1, 1, v58
+; CHECK-NEXT: ds_write_b32 v0, v1
; CHECK-NEXT: .LBB0_12: ; in Loop: Header=BB0_8 Depth=2
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s81
; CHECK-NEXT: ds_read_u8 v0, v59 offset:2
@@ -247,11 +247,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_mov_b32 s12, s51
; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
-; CHECK-NEXT: v_add_nc_u32_e32 v60, 2, v58
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; CHECK-NEXT: ds_write_b32 v0, v60
+; CHECK-NEXT: v_add_nc_u32_e32 v1, 2, v58
+; CHECK-NEXT: ds_write_b32 v0, v1
; CHECK-NEXT: .LBB0_14: ; in Loop: Header=BB0_8 Depth=2
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s81
; CHECK-NEXT: ds_read_u8 v0, v59 offset:3
@@ -790,7 +790,6 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
; CHECK-NEXT: s_addc_u32 s13, s13, 0
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
-; CHECK-NEXT: s_load_dwordx2 s[54:55], s[8:9], 0x10
; CHECK-NEXT: s_add_u32 s0, s0, s17
; CHECK-NEXT: s_mov_b64 s[38:39], s[8:9]
; CHECK-NEXT: s_addc_u32 s1, s1, 0
@@ -844,22 +843,24 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
; CHECK-NEXT: ds_write_b32 v43, v43 offset:15360
; CHECK-NEXT: v_add_nc_u32_e32 v44, 0x3c04, v46
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; CHECK-NEXT: s_load_dwordx2 s[4:5], s[38:39], 0x10
; CHECK-NEXT: v_lshrrev_b32_e32 v0, 1, v42
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 2, v42
; CHECK-NEXT: v_mov_b32_e32 v31, v40
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z3minjj at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z3minjj at rel32@hi+12
-; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[36:37]
; CHECK-NEXT: v_and_b32_e32 v0, 0x7ffffffc, v0
; CHECK-NEXT: v_and_b32_e32 v1, 28, v1
-; CHECK-NEXT: s_mov_b64 s[6:7], s[36:37]
; CHECK-NEXT: s_mov_b64 s[8:9], s[52:53]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
-; CHECK-NEXT: global_load_dword v0, v0, s[54:55]
; CHECK-NEXT: s_mov_b32 s12, s51
; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: global_load_dword v0, v0, s[4:5]
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_bfe_u32 v0, v0, v1, 4
; CHECK-NEXT: v_mov_b32_e32 v1, 12
diff --git a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
index 0af655dfbbee9..0cef89867622e 100644
--- a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
@@ -43,12 +43,12 @@ define amdgpu_kernel void @test_mfma_loop_zeroinit(ptr addrspace(1) %arg) #0 {
; GFX908-NEXT: v_accvgpr_write_b32 a1, 0
; GFX908-NEXT: v_accvgpr_write_b32 a0, 0
; GFX908-NEXT: s_mov_b32 s0, 16
-; GFX908-NEXT: v_mov_b32_e32 v0, 2.0
-; GFX908-NEXT: v_mov_b32_e32 v1, 1.0
+; GFX908-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX908-NEXT: v_mov_b32_e32 v1, 2.0
; GFX908-NEXT: .LBB0_1: ; %for.cond.preheader
; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX908-NEXT: s_nop 1
-; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
+; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
; GFX908-NEXT: s_add_i32 s0, s0, -1
; GFX908-NEXT: s_cmp_lg_u32 s0, 0
; GFX908-NEXT: s_cbranch_scc1 .LBB0_1
@@ -134,12 +134,12 @@ define amdgpu_kernel void @test_mfma_loop_zeroinit(ptr addrspace(1) %arg) #0 {
; GFX90A-NEXT: v_accvgpr_write_b32 a1, 0
; GFX90A-NEXT: v_accvgpr_write_b32 a0, 0
; GFX90A-NEXT: s_mov_b32 s0, 16
-; GFX90A-NEXT: v_mov_b32_e32 v0, 2.0
-; GFX90A-NEXT: v_mov_b32_e32 v1, 1.0
+; GFX90A-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX90A-NEXT: v_mov_b32_e32 v1, 2.0
; GFX90A-NEXT: .LBB0_1: ; %for.cond.preheader
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX90A-NEXT: s_nop 1
-; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
+; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
; GFX90A-NEXT: s_add_i32 s0, s0, -1
; GFX90A-NEXT: s_cmp_lg_u32 s0, 0
; GFX90A-NEXT: s_cbranch_scc1 .LBB0_1
@@ -193,12 +193,12 @@ define amdgpu_kernel void @test_mfma_loop_zeroinit(ptr addrspace(1) %arg) #0 {
; GFX942-NEXT: v_accvgpr_write_b32 a1, 0
; GFX942-NEXT: v_accvgpr_write_b32 a0, 0
; GFX942-NEXT: s_mov_b32 s0, 16
-; GFX942-NEXT: v_mov_b32_e32 v0, 2.0
-; GFX942-NEXT: v_mov_b32_e32 v1, 1.0
+; GFX942-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX942-NEXT: v_mov_b32_e32 v1, 2.0
; GFX942-NEXT: .LBB0_1: ; %for.cond.preheader
; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v1, v0, a[0:31]
+; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[0:31]
; GFX942-NEXT: s_add_i32 s0, s0, -1
; GFX942-NEXT: s_cmp_lg_u32 s0, 0
; GFX942-NEXT: s_cbranch_scc1 .LBB0_1
@@ -242,7 +242,7 @@ define amdgpu_kernel void @test_mfma_loop_unfoldable_splat(ptr addrspace(1) %arg
; GFX908: ; %bb.0: ; %entry
; GFX908-NEXT: v_mov_b32_e32 v0, 0x42f60000
; GFX908-NEXT: s_mov_b32 s0, 16
-; GFX908-NEXT: v_mov_b32_e32 v1, 1.0
+; GFX908-NEXT: v_mov_b32_e32 v1, 2.0
; GFX908-NEXT: v_accvgpr_write_b32 a31, v0
; GFX908-NEXT: v_accvgpr_write_b32 a30, v0
; GFX908-NEXT: v_accvgpr_write_b32 a29, v0
@@ -275,11 +275,11 @@ define amdgpu_kernel void @test_mfma_loop_unfoldable_splat(ptr addrspace(1) %arg
; GFX908-NEXT: v_accvgpr_write_b32 a2, v0
; GFX908-NEXT: v_accvgpr_write_b32 a1, v0
; GFX908-NEXT: v_accvgpr_write_b32 a0, v0
-; GFX908-NEXT: v_mov_b32_e32 v0, 2.0
+; GFX908-NEXT: v_mov_b32_e32 v0, 1.0
; GFX908-NEXT: .LBB1_1: ; %for.cond.preheader
; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX908-NEXT: s_nop 1
-; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
+; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
; GFX908-NEXT: s_add_i32 s0, s0, -1
; GFX908-NEXT: s_cmp_lg_u32 s0, 0
; GFX908-NEXT: s_cbranch_scc1 .LBB1_1
@@ -366,12 +366,12 @@ define amdgpu_kernel void @test_mfma_loop_unfoldable_splat(ptr addrspace(1) %arg
; GFX90A-NEXT: v_accvgpr_write_b32 a1, v0
; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0
; GFX90A-NEXT: s_mov_b32 s0, 16
-; GFX90A-NEXT: v_mov_b32_e32 v0, 2.0
-; GFX90A-NEXT: v_mov_b32_e32 v1, 1.0
+; GFX90A-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX90A-NEXT: v_mov_b32_e32 v1, 2.0
; GFX90A-NEXT: .LBB1_1: ; %for.cond.preheader
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX90A-NEXT: s_nop 1
-; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
+; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
; GFX90A-NEXT: s_add_i32 s0, s0, -1
; GFX90A-NEXT: s_cmp_lg_u32 s0, 0
; GFX90A-NEXT: s_cbranch_scc1 .LBB1_1
@@ -426,12 +426,12 @@ define amdgpu_kernel void @test_mfma_loop_unfoldable_splat(ptr addrspace(1) %arg
; GFX942-NEXT: v_accvgpr_write_b32 a1, v0
; GFX942-NEXT: v_accvgpr_write_b32 a0, v0
; GFX942-NEXT: s_mov_b32 s0, 16
-; GFX942-NEXT: v_mov_b32_e32 v0, 2.0
-; GFX942-NEXT: v_mov_b32_e32 v1, 1.0
+; GFX942-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX942-NEXT: v_mov_b32_e32 v1, 2.0
; GFX942-NEXT: .LBB1_1: ; %for.cond.preheader
; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v1, v0, a[0:31]
+; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[0:31]
; GFX942-NEXT: s_add_i32 s0, s0, -1
; GFX942-NEXT: s_cmp_lg_u32 s0, 0
; GFX942-NEXT: s_cbranch_scc1 .LBB1_1
@@ -500,13 +500,13 @@ define amdgpu_kernel void @test_mfma_loop_non_splat(ptr addrspace(1) %arg) #0 {
; GFX908-NEXT: v_accvgpr_write_b32 a3, 0
; GFX908-NEXT: v_accvgpr_write_b32 a2, 0
; GFX908-NEXT: v_accvgpr_write_b32 a0, 0
-; GFX908-NEXT: v_mov_b32_e32 v0, 1.0
; GFX908-NEXT: s_mov_b32 s0, 16
-; GFX908-NEXT: v_mov_b32_e32 v1, 2.0
+; GFX908-NEXT: v_mov_b32_e32 v0, 2.0
+; GFX908-NEXT: v_mov_b32_e32 v1, 1.0
; GFX908-NEXT: .LBB2_1: ; %for.cond.preheader
; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX908-NEXT: s_nop 1
-; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
+; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
; GFX908-NEXT: s_add_i32 s0, s0, -1
; GFX908-NEXT: s_cmp_lg_u32 s0, 0
; GFX908-NEXT: s_cbranch_scc1 .LBB2_1
@@ -592,12 +592,12 @@ define amdgpu_kernel void @test_mfma_loop_non_splat(ptr addrspace(1) %arg) #0 {
; GFX90A-NEXT: v_accvgpr_write_b32 a2, 0
; GFX90A-NEXT: v_accvgpr_write_b32 a0, 0
; GFX90A-NEXT: s_mov_b32 s0, 16
-; GFX90A-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX90A-NEXT: v_mov_b32_e32 v1, 2.0
+; GFX90A-NEXT: v_mov_b32_e32 v0, 2.0
+; GFX90A-NEXT: v_mov_b32_e32 v1, 1.0
; GFX90A-NEXT: .LBB2_1: ; %for.cond.preheader
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX90A-NEXT: s_nop 1
-; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
+; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
; GFX90A-NEXT: s_add_i32 s0, s0, -1
; GFX90A-NEXT: s_cmp_lg_u32 s0, 0
; GFX90A-NEXT: s_cbranch_scc1 .LBB2_1
@@ -651,12 +651,12 @@ define amdgpu_kernel void @test_mfma_loop_non_splat(ptr addrspace(1) %arg) #0 {
; GFX942-NEXT: v_accvgpr_write_b32 a2, 0
; GFX942-NEXT: v_accvgpr_write_b32 a0, 0
; GFX942-NEXT: s_mov_b32 s0, 16
-; GFX942-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX942-NEXT: v_mov_b32_e32 v1, 2.0
+; GFX942-NEXT: v_mov_b32_e32 v0, 2.0
+; GFX942-NEXT: v_mov_b32_e32 v1, 1.0
; GFX942-NEXT: .LBB2_1: ; %for.cond.preheader
; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[0:31]
+; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v1, v0, a[0:31]
; GFX942-NEXT: s_add_i32 s0, s0, -1
; GFX942-NEXT: s_cmp_lg_u32 s0, 0
; GFX942-NEXT: s_cbranch_scc1 .LBB2_1
@@ -763,12 +763,12 @@ define amdgpu_kernel void @test_mfma_loop_unfoldable_seq(ptr addrspace(1) %arg)
; GFX908-NEXT: v_accvgpr_write_b32 a1, v1
; GFX908-NEXT: v_accvgpr_write_b32 a0, v2
; GFX908-NEXT: s_mov_b32 s0, 16
-; GFX908-NEXT: v_mov_b32_e32 v0, 2.0
-; GFX908-NEXT: v_mov_b32_e32 v1, 1.0
+; GFX908-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX908-NEXT: v_mov_b32_e32 v1, 2.0
; GFX908-NEXT: .LBB3_1: ; %for.cond.preheader
; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX908-NEXT: s_nop 1
-; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
+; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
; GFX908-NEXT: s_add_i32 s0, s0, -1
; GFX908-NEXT: s_cmp_lg_u32 s0, 0
; GFX908-NEXT: s_cbranch_scc1 .LBB3_1
@@ -886,12 +886,12 @@ define amdgpu_kernel void @test_mfma_loop_unfoldable_seq(ptr addrspace(1) %arg)
; GFX90A-NEXT: v_mov_b32_e32 v0, 0x42f60000
; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0
; GFX90A-NEXT: s_mov_b32 s0, 16
-; GFX90A-NEXT: v_mov_b32_e32 v0, 2.0
-; GFX90A-NEXT: v_mov_b32_e32 v1, 1.0
+; GFX90A-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX90A-NEXT: v_mov_b32_e32 v1, 2.0
; GFX90A-NEXT: .LBB3_1: ; %for.cond.preheader
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX90A-NEXT: s_nop 1
-; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
+; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
; GFX90A-NEXT: s_add_i32 s0, s0, -1
; GFX90A-NEXT: s_cmp_lg_u32 s0, 0
; GFX90A-NEXT: s_cbranch_scc1 .LBB3_1
@@ -977,12 +977,12 @@ define amdgpu_kernel void @test_mfma_loop_unfoldable_seq(ptr addrspace(1) %arg)
; GFX942-NEXT: v_mov_b32_e32 v0, 0x42f60000
; GFX942-NEXT: v_accvgpr_write_b32 a0, v0
; GFX942-NEXT: s_mov_b32 s0, 16
-; GFX942-NEXT: v_mov_b32_e32 v0, 2.0
-; GFX942-NEXT: v_mov_b32_e32 v1, 1.0
+; GFX942-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX942-NEXT: v_mov_b32_e32 v1, 2.0
; GFX942-NEXT: .LBB3_1: ; %for.cond.preheader
; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v1, v0, a[0:31]
+; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[0:31]
; GFX942-NEXT: s_add_i32 s0, s0, -1
; GFX942-NEXT: s_cmp_lg_u32 s0, 0
; GFX942-NEXT: s_cbranch_scc1 .LBB3_1
@@ -1052,12 +1052,12 @@ define amdgpu_kernel void @test_mfma_loop_vgpr_init(ptr addrspace(1) %arg) #0 {
; GFX908-NEXT: v_accvgpr_write_b32 a1, v0
; GFX908-NEXT: v_accvgpr_write_b32 a0, v0
; GFX908-NEXT: s_mov_b32 s0, 16
-; GFX908-NEXT: v_mov_b32_e32 v0, 2.0
-; GFX908-NEXT: v_mov_b32_e32 v1, 1.0
+; GFX908-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX908-NEXT: v_mov_b32_e32 v1, 2.0
; GFX908-NEXT: .LBB4_1: ; %for.cond.preheader
; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX908-NEXT: s_nop 1
-; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
+; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
; GFX908-NEXT: s_add_i32 s0, s0, -1
; GFX908-NEXT: s_cmp_lg_u32 s0, 0
; GFX908-NEXT: s_cbranch_scc1 .LBB4_1
@@ -1144,12 +1144,12 @@ define amdgpu_kernel void @test_mfma_loop_vgpr_init(ptr addrspace(1) %arg) #0 {
; GFX90A-NEXT: v_accvgpr_write_b32 a1, v0
; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0
; GFX90A-NEXT: s_mov_b32 s0, 16
-; GFX90A-NEXT: v_mov_b32_e32 v0, 2.0
-; GFX90A-NEXT: v_mov_b32_e32 v1, 1.0
+; GFX90A-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX90A-NEXT: v_mov_b32_e32 v1, 2.0
; GFX90A-NEXT: .LBB4_1: ; %for.cond.preheader
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX90A-NEXT: s_nop 1
-; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
+; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
; GFX90A-NEXT: s_add_i32 s0, s0, -1
; GFX90A-NEXT: s_cmp_lg_u32 s0, 0
; GFX90A-NEXT: s_cbranch_scc1 .LBB4_1
@@ -1204,12 +1204,12 @@ define amdgpu_kernel void @test_mfma_loop_vgpr_init(ptr addrspace(1) %arg) #0 {
; GFX942-NEXT: v_accvgpr_write_b32 a1, v0
; GFX942-NEXT: v_accvgpr_write_b32 a0, v0
; GFX942-NEXT: s_mov_b32 s0, 16
-; GFX942-NEXT: v_mov_b32_e32 v0, 2.0
-; GFX942-NEXT: v_mov_b32_e32 v1, 1.0
+; GFX942-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX942-NEXT: v_mov_b32_e32 v1, 2.0
; GFX942-NEXT: .LBB4_1: ; %for.cond.preheader
; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v1, v0, a[0:31]
+; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[0:31]
; GFX942-NEXT: s_add_i32 s0, s0, -1
; GFX942-NEXT: s_cmp_lg_u32 s0, 0
; GFX942-NEXT: s_cbranch_scc1 .LBB4_1
@@ -1282,7 +1282,7 @@ define amdgpu_kernel void @test_mfma_loop_sgpr_init(ptr addrspace(1) %arg, float
; GFX908-LABEL: test_mfma_loop_sgpr_init:
; GFX908: ; %bb.0: ; %entry
; GFX908-NEXT: s_load_dword s0, s[4:5], 0x2c
-; GFX908-NEXT: v_mov_b32_e32 v1, 1.0
+; GFX908-NEXT: v_mov_b32_e32 v1, 2.0
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
; GFX908-NEXT: v_mov_b32_e32 v0, s0
; GFX908-NEXT: s_mov_b32 s0, 16
@@ -1319,11 +1319,11 @@ define amdgpu_kernel void @test_mfma_loop_sgpr_init(ptr addrspace(1) %arg, float
; GFX908-NEXT: v_accvgpr_write_b32 a2, v0
; GFX908-NEXT: v_accvgpr_write_b32 a1, v0
; GFX908-NEXT: v_accvgpr_write_b32 a0, v0
-; GFX908-NEXT: v_mov_b32_e32 v0, 2.0
+; GFX908-NEXT: v_mov_b32_e32 v0, 1.0
; GFX908-NEXT: .LBB5_1: ; %for.cond.preheader
; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX908-NEXT: s_nop 1
-; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
+; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
; GFX908-NEXT: s_add_i32 s0, s0, -1
; GFX908-NEXT: s_cmp_lg_u32 s0, 0
; GFX908-NEXT: s_cbranch_scc1 .LBB5_1
@@ -1378,7 +1378,7 @@ define amdgpu_kernel void @test_mfma_loop_sgpr_init(ptr addrspace(1) %arg, float
; GFX90A: ; %bb.0: ; %entry
; GFX90A-NEXT: s_load_dword s1, s[4:5], 0x2c
; GFX90A-NEXT: s_mov_b32 s0, 16
-; GFX90A-NEXT: v_mov_b32_e32 v1, 1.0
+; GFX90A-NEXT: v_mov_b32_e32 v1, 2.0
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: v_mov_b32_e32 v0, s1
; GFX90A-NEXT: v_accvgpr_write_b32 a31, v0
@@ -1413,11 +1413,11 @@ define amdgpu_kernel void @test_mfma_loop_sgpr_init(ptr addrspace(1) %arg, float
; GFX90A-NEXT: v_accvgpr_write_b32 a2, v0
; GFX90A-NEXT: v_accvgpr_write_b32 a1, v0
; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0
-; GFX90A-NEXT: v_mov_b32_e32 v0, 2.0
+; GFX90A-NEXT: v_mov_b32_e32 v0, 1.0
; GFX90A-NEXT: .LBB5_1: ; %for.cond.preheader
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX90A-NEXT: s_nop 1
-; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
+; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
; GFX90A-NEXT: s_add_i32 s0, s0, -1
; GFX90A-NEXT: s_cmp_lg_u32 s0, 0
; GFX90A-NEXT: s_cbranch_scc1 .LBB5_1
@@ -1440,7 +1440,7 @@ define amdgpu_kernel void @test_mfma_loop_sgpr_init(ptr addrspace(1) %arg, float
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_load_dword s1, s[4:5], 0x2c
; GFX942-NEXT: s_mov_b32 s0, 16
-; GFX942-NEXT: v_mov_b32_e32 v1, 1.0
+; GFX942-NEXT: v_mov_b32_e32 v1, 2.0
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: v_mov_b32_e32 v0, s1
; GFX942-NEXT: v_accvgpr_write_b32 a31, v0
@@ -1475,11 +1475,11 @@ define amdgpu_kernel void @test_mfma_loop_sgpr_init(ptr addrspace(1) %arg, float
; GFX942-NEXT: v_accvgpr_write_b32 a2, v0
; GFX942-NEXT: v_accvgpr_write_b32 a1, v0
; GFX942-NEXT: v_accvgpr_write_b32 a0, v0
-; GFX942-NEXT: v_mov_b32_e32 v0, 2.0
+; GFX942-NEXT: v_mov_b32_e32 v0, 1.0
; GFX942-NEXT: .LBB5_1: ; %for.cond.preheader
; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v1, v0, a[0:31]
+; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[0:31]
; GFX942-NEXT: s_add_i32 s0, s0, -1
; GFX942-NEXT: s_cmp_lg_u32 s0, 0
; GFX942-NEXT: s_cbranch_scc1 .LBB5_1
@@ -1585,12 +1585,12 @@ define amdgpu_kernel void @test_mfma_loop_mixed_init(ptr addrspace(1) %arg, floa
; GFX908-NEXT: v_accvgpr_write_b32 a2, 0
; GFX908-NEXT: v_accvgpr_write_b32 a1, v0
; GFX908-NEXT: s_mov_b32 s0, 16
-; GFX908-NEXT: v_mov_b32_e32 v0, 2.0
-; GFX908-NEXT: v_mov_b32_e32 v1, 1.0
+; GFX908-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX908-NEXT: v_mov_b32_e32 v1, 2.0
; GFX908-NEXT: .LBB6_1: ; %for.cond.preheader
; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX908-NEXT: s_nop 1
-; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
+; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
; GFX908-NEXT: s_add_i32 s0, s0, -1
; GFX908-NEXT: s_cmp_lg_u32 s0, 0
; GFX908-NEXT: s_cbranch_scc1 .LBB6_1
@@ -1680,12 +1680,12 @@ define amdgpu_kernel void @test_mfma_loop_mixed_init(ptr addrspace(1) %arg, floa
; GFX90A-NEXT: v_accvgpr_write_b32 a2, 0
; GFX90A-NEXT: s_mov_b32 s0, 16
; GFX90A-NEXT: v_accvgpr_write_b32 a1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v0, 2.0
-; GFX90A-NEXT: v_mov_b32_e32 v1, 1.0
+; GFX90A-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX90A-NEXT: v_mov_b32_e32 v1, 2.0
; GFX90A-NEXT: .LBB6_1: ; %for.cond.preheader
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX90A-NEXT: s_nop 1
-; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
+; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
; GFX90A-NEXT: s_add_i32 s0, s0, -1
; GFX90A-NEXT: s_cmp_lg_u32 s0, 0
; GFX90A-NEXT: s_cbranch_scc1 .LBB6_1
@@ -1743,12 +1743,12 @@ define amdgpu_kernel void @test_mfma_loop_mixed_init(ptr addrspace(1) %arg, floa
; GFX942-NEXT: v_accvgpr_write_b32 a2, 0
; GFX942-NEXT: s_mov_b32 s0, 16
; GFX942-NEXT: v_accvgpr_write_b32 a1, v0
-; GFX942-NEXT: v_mov_b32_e32 v0, 2.0
-; GFX942-NEXT: v_mov_b32_e32 v1, 1.0
+; GFX942-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX942-NEXT: v_mov_b32_e32 v1, 2.0
; GFX942-NEXT: .LBB6_1: ; %for.cond.preheader
; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v1, v0, a[0:31]
+; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[0:31]
; GFX942-NEXT: s_add_i32 s0, s0, -1
; GFX942-NEXT: s_cmp_lg_u32 s0, 0
; GFX942-NEXT: s_cbranch_scc1 .LBB6_1
@@ -2565,12 +2565,12 @@ define <32 x float> @test_mfma_loop_zeroinit_ret_use() #0 {
; GFX908-NEXT: v_accvgpr_write_b32 a1, 0
; GFX908-NEXT: v_accvgpr_write_b32 a0, 0
; GFX908-NEXT: s_mov_b32 s4, 16
-; GFX908-NEXT: v_mov_b32_e32 v0, 2.0
-; GFX908-NEXT: v_mov_b32_e32 v1, 1.0
+; GFX908-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX908-NEXT: v_mov_b32_e32 v1, 2.0
; GFX908-NEXT: .LBB10_1: ; %for.cond.preheader
; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX908-NEXT: s_nop 1
-; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
+; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
; GFX908-NEXT: s_add_i32 s4, s4, -1
; GFX908-NEXT: s_cmp_lg_u32 s4, 0
; GFX908-NEXT: s_cbranch_scc1 .LBB10_1
@@ -2646,12 +2646,12 @@ define <32 x float> @test_mfma_loop_zeroinit_ret_use() #0 {
; GFX90A-NEXT: v_accvgpr_write_b32 a1, 0
; GFX90A-NEXT: v_accvgpr_write_b32 a0, 0
; GFX90A-NEXT: s_mov_b32 s4, 16
-; GFX90A-NEXT: v_mov_b32_e32 v0, 2.0
-; GFX90A-NEXT: v_mov_b32_e32 v1, 1.0
+; GFX90A-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX90A-NEXT: v_mov_b32_e32 v1, 2.0
; GFX90A-NEXT: .LBB10_1: ; %for.cond.preheader
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX90A-NEXT: s_nop 1
-; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
+; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
; GFX90A-NEXT: s_add_i32 s4, s4, -1
; GFX90A-NEXT: s_cmp_lg_u32 s4, 0
; GFX90A-NEXT: s_cbranch_scc1 .LBB10_1
@@ -2727,12 +2727,12 @@ define <32 x float> @test_mfma_loop_zeroinit_ret_use() #0 {
; GFX942-NEXT: v_accvgpr_write_b32 a1, 0
; GFX942-NEXT: v_accvgpr_write_b32 a0, 0
; GFX942-NEXT: s_mov_b32 s0, 16
-; GFX942-NEXT: v_mov_b32_e32 v0, 2.0
-; GFX942-NEXT: v_mov_b32_e32 v1, 1.0
+; GFX942-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX942-NEXT: v_mov_b32_e32 v1, 2.0
; GFX942-NEXT: .LBB10_1: ; %for.cond.preheader
; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v1, v0, a[0:31]
+; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[0:31]
; GFX942-NEXT: s_add_i32 s0, s0, -1
; GFX942-NEXT: s_cmp_lg_u32 s0, 0
; GFX942-NEXT: s_cbranch_scc1 .LBB10_1
@@ -2823,12 +2823,12 @@ define <32 x float> @test_mfma_loop_non_splat_ret_use() #0 {
; GFX908-NEXT: v_accvgpr_write_b32 a2, 0
; GFX908-NEXT: v_accvgpr_write_b32 a0, 0
; GFX908-NEXT: s_mov_b32 s4, 16
-; GFX908-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX908-NEXT: v_mov_b32_e32 v1, 2.0
+; GFX908-NEXT: v_mov_b32_e32 v0, 2.0
+; GFX908-NEXT: v_mov_b32_e32 v1, 1.0
; GFX908-NEXT: .LBB11_1: ; %for.cond.preheader
; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX908-NEXT: s_nop 1
-; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
+; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
; GFX908-NEXT: s_add_i32 s4, s4, -1
; GFX908-NEXT: s_cmp_lg_u32 s4, 0
; GFX908-NEXT: s_cbranch_scc1 .LBB11_1
@@ -2904,12 +2904,12 @@ define <32 x float> @test_mfma_loop_non_splat_ret_use() #0 {
; GFX90A-NEXT: v_accvgpr_write_b32 a2, 0
; GFX90A-NEXT: v_accvgpr_write_b32 a0, 0
; GFX90A-NEXT: s_mov_b32 s4, 16
-; GFX90A-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX90A-NEXT: v_mov_b32_e32 v1, 2.0
+; GFX90A-NEXT: v_mov_b32_e32 v0, 2.0
+; GFX90A-NEXT: v_mov_b32_e32 v1, 1.0
; GFX90A-NEXT: .LBB11_1: ; %for.cond.preheader
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX90A-NEXT: s_nop 1
-; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
+; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
; GFX90A-NEXT: s_add_i32 s4, s4, -1
; GFX90A-NEXT: s_cmp_lg_u32 s4, 0
; GFX90A-NEXT: s_cbranch_scc1 .LBB11_1
@@ -2985,12 +2985,12 @@ define <32 x float> @test_mfma_loop_non_splat_ret_use() #0 {
; GFX942-NEXT: v_accvgpr_write_b32 a2, 0
; GFX942-NEXT: v_accvgpr_write_b32 a0, 0
; GFX942-NEXT: s_mov_b32 s0, 16
-; GFX942-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX942-NEXT: v_mov_b32_e32 v1, 2.0
+; GFX942-NEXT: v_mov_b32_e32 v0, 2.0
+; GFX942-NEXT: v_mov_b32_e32 v1, 1.0
; GFX942-NEXT: .LBB11_1: ; %for.cond.preheader
; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[0:31]
+; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v1, v0, a[0:31]
; GFX942-NEXT: s_add_i32 s0, s0, -1
; GFX942-NEXT: s_cmp_lg_u32 s0, 0
; GFX942-NEXT: s_cbranch_scc1 .LBB11_1
diff --git a/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir b/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir
index af882c06e1b4e..64a1752aef874 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir
@@ -24,7 +24,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_MOV_B32_e32_:%[0-9]+]].sub3:vreg_512 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 0, [[V_MOV_B32_e32_1]], implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_512 = COPY [[V_MOV_B32_e32_]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
@@ -33,17 +33,17 @@ body: |
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]].sub3, undef %5:vgpr_32, $sgpr24_sgpr25_sgpr26_sgpr27, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32), align 8, addrspace 5)
; CHECK-NEXT: dead [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 undef %7:vgpr_32, 0, 0, implicit $exec
; CHECK-NEXT: dead [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec
- ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 0, [[V_MOV_B32_e32_1]], implicit $exec
; CHECK-NEXT: dead [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[V_ADD_U32_e32_]], 0, 0, implicit $exec
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub1:vreg_512 = COPY [[COPY]].sub1
; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def dead [[COPY1]], 1835018 /* regdef:VGPR_32 */, def dead [[COPY]].sub1, 1835017 /* reguse:VGPR_32 */, [[COPY1]], 1835017 /* reguse:VGPR_32 */, [[COPY]].sub1
; CHECK-NEXT: [[COPY2:%[0-9]+]].sub0:vreg_512 = COPY [[COPY]].sub0
; CHECK-NEXT: [[COPY2:%[0-9]+]].sub3:vreg_512 = COPY [[COPY]].sub3
+ ; CHECK-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: dead [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 4, [[V_MOV_B32_e32_2]], implicit-def dead $vcc, implicit $exec
; CHECK-NEXT: [[COPY2:%[0-9]+]].sub2:vreg_512 = COPY undef [[V_MOV_B32_e32_1]]
; CHECK-NEXT: [[COPY2:%[0-9]+]].sub5:vreg_512 = COPY undef [[V_MOV_B32_e32_1]]
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_512 = COPY [[COPY2]]
- ; CHECK-NEXT: dead [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 4, [[V_MOV_B32_e32_2]], implicit-def dead $vcc, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.1
bb.0:
liveins: $sgpr6_sgpr7
diff --git a/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir b/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
index 2cd78062ccbd7..7eb6e82226c31 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
@@ -49,7 +49,6 @@ body: |
; CHECK-NEXT: dead [[GLOBAL_LOAD_DWORD5:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF5]], 0, 0, implicit $exec
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 2, [[DEF2]], implicit $exec
; CHECK-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
@@ -67,6 +66,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: S_NOP 0, implicit [[COPY]]
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit [[DEF8]]
; CHECK-NEXT: S_ENDPGM 0
bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-ilp.mir b/llvm/test/CodeGen/AMDGPU/schedule-ilp.mir
index f1a8af42e6347..28f36161727f7 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-ilp.mir
+++ b/llvm/test/CodeGen/AMDGPU/schedule-ilp.mir
@@ -29,15 +29,15 @@ body: |
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: $exec = S_OR_B64 $exec, [[DEF]], implicit-def $scc
- ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF3]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
- ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF3]], 4, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
- ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD2:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF3]], 8, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
- ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN [[DEF1]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
- ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN [[DEF2]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF1]], 4, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD2:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF1]], 8, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN [[DEF2]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN [[DEF3]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
; CHECK-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 -1, [[GLOBAL_LOAD_DWORD]], implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
diff --git a/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir b/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir
index 40be0c6b67ee9..dfcd90ba2a067 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir
@@ -57,9 +57,9 @@ body: |
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_MOV_B32_e32_:%[0-9]+]].sub2:vreg_128 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK-NEXT: undef [[V_MOV_B32_e32_1:%[0-9]+]].sub2:vreg_128 = V_MOV_B32_e32 1786773504, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: undef [[V_MOV_B32_e32_1:%[0-9]+]].sub2:vreg_128 = V_MOV_B32_e32 1786773504, implicit $exec
; CHECK-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_1]].sub2
; CHECK-NEXT: S_NOP 0, implicit undef [[V_MOV_B32_e32_]].sub0
; CHECK-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_]].sub2
diff --git a/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll b/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll
index b045c761436de..d24d993d335df 100644
--- a/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll
+++ b/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll
@@ -57,20 +57,20 @@ define amdgpu_kernel void @v4i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX942-NEXT: v_and_b32_e32 v3, 0x3ff, v0
-; GFX942-NEXT: v_lshlrev_b32_e32 v1, 2, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_and_b32_e32 v2, 0x3ff, v0
+; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v2
+; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v2
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942-NEXT: global_load_dword v2, v1, s[0:1]
-; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v3
+; GFX942-NEXT: global_load_dword v1, v0, s[0:1]
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX942-NEXT: s_cbranch_execz .LBB1_2
; GFX942-NEXT: ; %bb.1: ; %bb.1
-; GFX942-NEXT: global_load_dword v2, v1, s[2:3]
+; GFX942-NEXT: global_load_dword v1, v0, s[2:3]
; GFX942-NEXT: .LBB1_2: ; %bb.2
; GFX942-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: s_waitcnt vmcnt(0)
-; GFX942-NEXT: global_store_dword v0, v2, s[6:7]
+; GFX942-NEXT: global_store_dword v0, v1, s[6:7]
; GFX942-NEXT: s_endpgm
entry:
%idx = call i32 @llvm.amdgcn.workitem.id.x()
@@ -135,18 +135,18 @@ define amdgpu_kernel void @v8i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX942-NEXT: v_and_b32_e32 v4, 0x3ff, v0
-; GFX942-NEXT: v_lshlrev_b32_e32 v1, 3, v4
-; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_and_b32_e32 v1, 0x3ff, v0
+; GFX942-NEXT: v_lshlrev_b32_e32 v0, 3, v1
+; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v1
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942-NEXT: global_load_dwordx2 v[2:3], v1, s[0:1]
-; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v4
+; GFX942-NEXT: global_load_dwordx2 v[2:3], v0, s[0:1]
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX942-NEXT: s_cbranch_execz .LBB3_2
; GFX942-NEXT: ; %bb.1: ; %bb.1
-; GFX942-NEXT: global_load_dwordx2 v[2:3], v1, s[2:3]
+; GFX942-NEXT: global_load_dwordx2 v[2:3], v0, s[2:3]
; GFX942-NEXT: .LBB3_2: ; %bb.2
; GFX942-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[6:7]
; GFX942-NEXT: s_endpgm
@@ -172,18 +172,18 @@ define amdgpu_kernel void @v16i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX942-NEXT: v_and_b32_e32 v6, 0x3ff, v0
-; GFX942-NEXT: v_lshlrev_b32_e32 v1, 4, v6
-; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_and_b32_e32 v1, 0x3ff, v0
+; GFX942-NEXT: v_lshlrev_b32_e32 v0, 4, v1
+; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v1
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942-NEXT: global_load_dwordx4 v[2:5], v1, s[0:1]
-; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v6
+; GFX942-NEXT: global_load_dwordx4 v[2:5], v0, s[0:1]
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX942-NEXT: s_cbranch_execz .LBB4_2
; GFX942-NEXT: ; %bb.1: ; %bb.1
-; GFX942-NEXT: global_load_dwordx4 v[2:5], v1, s[2:3]
+; GFX942-NEXT: global_load_dwordx4 v[2:5], v0, s[2:3]
; GFX942-NEXT: .LBB4_2: ; %bb.2
; GFX942-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_store_dwordx4 v0, v[2:5], s[6:7]
; GFX942-NEXT: s_endpgm
@@ -617,30 +617,30 @@ define amdgpu_kernel void @v8i8_multi_block(ptr addrspace(1) %src1, ptr addrspac
; GFX942-LABEL: v8i8_multi_block:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
-; GFX942-NEXT: v_and_b32_e32 v3, 0x3ff, v0
-; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v3
+; GFX942-NEXT: v_and_b32_e32 v2, 0x3ff, v0
+; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v2
+; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v2
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942-NEXT: global_load_dwordx2 v[0:1], v4, s[8:9]
+; GFX942-NEXT: global_load_dwordx2 v[0:1], v3, s[8:9]
; GFX942-NEXT: s_waitcnt vmcnt(0)
-; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX942-NEXT: s_cbranch_execz .LBB11_4
; GFX942-NEXT: ; %bb.1: ; %bb.1
-; GFX942-NEXT: global_load_dwordx2 v[6:7], v4, s[10:11]
-; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 7, v3
+; GFX942-NEXT: global_load_dwordx2 v[4:5], v3, s[10:11]
+; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 7, v2
; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-NEXT: s_cbranch_execz .LBB11_3
; GFX942-NEXT: ; %bb.2: ; %bb.2
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[12:13]
+; GFX942-NEXT: v_mov_b32_e32 v2, 0
+; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[12:13]
; GFX942-NEXT: .LBB11_3: ; %Flow
; GFX942-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX942-NEXT: .LBB11_4: ; %bb.3
; GFX942-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: s_waitcnt vmcnt(0)
-; GFX942-NEXT: global_store_dwordx2 v2, v[6:7], s[14:15]
+; GFX942-NEXT: global_store_dwordx2 v0, v[4:5], s[14:15]
; GFX942-NEXT: s_endpgm
entry:
%idx = call i32 @llvm.amdgcn.workitem.id.x()
@@ -858,19 +858,19 @@ define amdgpu_kernel void @v8i8_mfma_i8(ptr addrspace(1) %src1, ptr addrspace(1)
; GFX942-LABEL: v8i8_mfma_i8:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
-; GFX942-NEXT: v_and_b32_e32 v4, 0x3ff, v0
-; GFX942-NEXT: v_lshlrev_b32_e32 v1, 3, v4
-; GFX942-NEXT: v_mov_b32_e32 v0, 0
-; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v4
+; GFX942-NEXT: v_and_b32_e32 v1, 0x3ff, v0
+; GFX942-NEXT: v_lshlrev_b32_e32 v0, 3, v1
+; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v1
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942-NEXT: global_load_dwordx2 v[2:3], v1, s[8:9]
+; GFX942-NEXT: global_load_dwordx2 v[2:3], v0, s[8:9]
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX942-NEXT: s_cbranch_execz .LBB14_2
; GFX942-NEXT: ; %bb.1: ; %bb.1
-; GFX942-NEXT: global_load_dwordx2 v[2:3], v1, s[10:11]
+; GFX942-NEXT: global_load_dwordx2 v[2:3], v0, s[10:11]
; GFX942-NEXT: .LBB14_2: ; %bb.2
; GFX942-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX942-NEXT: s_load_dwordx4 s[0:3], s[14:15], 0x0
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: v_accvgpr_write_b32 a0, s0
; GFX942-NEXT: v_accvgpr_write_b32 a1, s1
@@ -995,20 +995,20 @@ define amdgpu_kernel void @v8i8_intrinsic(ptr addrspace(1) %src1, ptr addrspace(
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX942-NEXT: v_and_b32_e32 v4, 0x3ff, v0
-; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
+; GFX942-NEXT: v_and_b32_e32 v3, 0x3ff, v0
+; GFX942-NEXT: v_lshlrev_b32_e32 v2, 3, v3
+; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v3
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942-NEXT: global_load_dwordx2 v[0:1], v3, s[0:1]
-; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v4
+; GFX942-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX942-NEXT: s_cbranch_execz .LBB16_2
; GFX942-NEXT: ; %bb.1: ; %bb.1
-; GFX942-NEXT: global_load_dwordx2 v[0:1], v3, s[2:3]
+; GFX942-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3]
; GFX942-NEXT: .LBB16_2: ; %bb.2
; GFX942-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: v_pk_fma_f32 v[0:1], v[0:1], v[0:1], v[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, 0
; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7]
; GFX942-NEXT: s_endpgm
entry:
diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll
index 75db3879e7b03..c7827ed300ffe 100644
--- a/llvm/test/CodeGen/AMDGPU/wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wave32.ll
@@ -2250,13 +2250,13 @@ define amdgpu_ps void @test_wqm_vote(float %a) {
; GFX1032: ; %bb.0:
; GFX1032-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_mov_b32 s0, exec_lo
-; GFX1032-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-NEXT: s_wqm_b32 s1, vcc_lo
; GFX1032-NEXT: s_andn2_b32 s1, exec_lo, s1
; GFX1032-NEXT: s_andn2_b32 s0, s0, s1
; GFX1032-NEXT: s_cbranch_scc0 .LBB44_2
; GFX1032-NEXT: ; %bb.1:
; GFX1032-NEXT: s_and_b32 exec_lo, exec_lo, s0
+; GFX1032-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-NEXT: exp mrt0 off, off, off, off
; GFX1032-NEXT: s_endpgm
; GFX1032-NEXT: .LBB44_2:
@@ -2268,13 +2268,13 @@ define amdgpu_ps void @test_wqm_vote(float %a) {
; GFX1064: ; %bb.0:
; GFX1064-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0
; GFX1064-NEXT: s_mov_b64 s[0:1], exec
-; GFX1064-NEXT: v_mov_b32_e32 v0, 0
; GFX1064-NEXT: s_wqm_b64 s[2:3], vcc
; GFX1064-NEXT: s_andn2_b64 s[2:3], exec, s[2:3]
; GFX1064-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3]
; GFX1064-NEXT: s_cbranch_scc0 .LBB44_2
; GFX1064-NEXT: ; %bb.1:
; GFX1064-NEXT: s_and_b64 exec, exec, s[0:1]
+; GFX1064-NEXT: v_mov_b32_e32 v0, 0
; GFX1064-NEXT: exp mrt0 off, off, off, off
; GFX1064-NEXT: s_endpgm
; GFX1064-NEXT: .LBB44_2:
>From 295cafeb935f78496edd5cd35e00ab3653b48f8f Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Mon, 11 Aug 2025 21:53:38 +0000
Subject: [PATCH 02/10] Format
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index fca3eda298f7a..04492cb4ff8c8 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1313,7 +1313,7 @@ bool PreRARematStage::initGCNSchedStage() {
unsigned DynamicVGPRBlockSize = MFI.getDynamicVGPRBlockSize();
AchievedOcc = MFI.getMaxWavesPerEU();
for (unsigned I : RescheduleRegions.set_bits()) {
- const GCNRegPressure &RP = RPTargets[I].getCurrentRP();;
+ const GCNRegPressure &RP = RPTargets[I].getCurrentRP();
DAG.Pressure[I] = RP;
unsigned NewRegionOcc = RP.getOccupancy(ST, DynamicVGPRBlockSize);
AchievedOcc = std::min(AchievedOcc, NewRegionOcc);
>From 98a4392e206660ce784ebe773e33c4f62b49d792 Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Tue, 12 Aug 2025 14:27:11 +0000
Subject: [PATCH 03/10] Address feedback + fix failing test
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 32 ++++------
...ne-scheduler-rematerialization-scoring.mir | 59 ++++++++-----------
.../machine-scheduler-sink-trivial-remats.mir | 4 +-
...dgpu_generated_funcs.ll.generated.expected | 8 ++-
...pu_generated_funcs.ll.nogenerated.expected | 8 ++-
5 files changed, 49 insertions(+), 62 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 04492cb4ff8c8..d133964758485 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -40,8 +40,6 @@
#include "llvm/MC/MCSchedule.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include <deque>
#include <limits>
#include <string>
@@ -1121,16 +1119,16 @@ void PreRARematStage::printTargetRegions(bool PrintAll) const {
void PreRARematStage::RematReg::print(
const DenseMap<MachineInstr *, unsigned> &MIRegion) const {
- REMAT_DEBUG(dbgs() << " [" << MIRegion.at(DefMI) << "] " << *DefMI);
- REMAT_DEBUG(dbgs() << " -> used in [" << UseRegion << "] " << *UseMI);
+ dbgs() << REMAT_PREFIX << " [" << MIRegion.at(DefMI) << "] " << *DefMI;
+ dbgs() << REMAT_PREFIX << " -> used in [" << UseRegion << "] " << *UseMI;
const unsigned NumRegions = Live.size();
- REMAT_DEBUG(dbgs() << " Guaranteed RP reduction in:");
+ dbgs() << REMAT_PREFIX << " Guaranteed RP reduction in:";
for (unsigned I = 0; I < NumRegions; ++I) {
if (isBeneficialRegion(I))
dbgs() << " [" << I << "]";
}
dbgs() << '\n';
- REMAT_DEBUG(dbgs() << " Possible RP reduction in:");
+ dbgs() << REMAT_PREFIX << " Possible RP reduction in:";
for (unsigned I = 0; I < NumRegions; ++I) {
if (isMaybeBeneficialRegion(I))
dbgs() << " [" << I << "]";
@@ -1883,6 +1881,9 @@ bool PreRARematStage::collectRematRegs(ArrayRef<uint64_t> RegionFreq) {
// Set of registers already marked for potential remterialization; used for
// remat chains checks.
DenseSet<Register> RematRegSet;
+ auto IsMORematable = [&RematRegSet](const MachineOperand &MO) -> bool {
+ return MO.isReg() && RematRegSet.contains(MO.getReg());
+ };
// Identify rematerializable instructions in the function.
for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
@@ -1917,11 +1918,8 @@ bool PreRARematStage::collectRematRegs(ArrayRef<uint64_t> RegionFreq) {
// either rematerializing the candidates in careful ordering, or
// deferring the MBB RP walk until the entire chain has been
// rematerialized.
- MachineOperand &UseFirstMO = UseMI->getOperand(0);
- if ((UseFirstMO.isReg() && RematRegSet.contains(UseFirstMO.getReg())) ||
- llvm::any_of(DefMI.operands(), [&RematRegSet](MachineOperand &MO) {
- return MO.isReg() && RematRegSet.contains(MO.getReg());
- }))
+ const MachineOperand &UseMO = UseMI->getOperand(0);
+ if (IsMORematable(UseMO) || llvm::any_of(DefMI.operands(), IsMORematable))
continue;
// Do not rematerialize an instruction it it uses registers that aren't
@@ -1965,9 +1963,7 @@ PreRARematStage::RematReg::RematReg(
Live |= LiveOut;
// Store the register's lane bitmask.
- unsigned SubReg = DefMI->getOperand(0).getSubReg();
- Mask = SubReg ? DAG.TRI->getSubRegIndexLaneMask(SubReg)
- : DAG.MRI.getMaxLaneMaskForVReg(Reg);
+ Mask = DAG.TRI->getSubRegIndexLaneMask(DefMI->getOperand(0).getSubReg());
}
MachineInstr *
@@ -2054,8 +2050,7 @@ MachineInstr *PreRARematStage::rematerialize(const RematReg &Remat,
const SIInstrInfo *TII = MF.getSubtarget<GCNSubtarget>().getInstrInfo();
MachineInstr &DefMI = *Remat.DefMI;
Register Reg = DefMI.getOperand(0).getReg();
- const TargetRegisterClass *RC = DAG.MRI.getRegClass(Reg);
- Register NewReg = DAG.MRI.createVirtualRegister(RC);
+ Register NewReg = DAG.MRI.cloneVirtualRegister(Reg);
// Rematerialize the register in the region where it is used.
MachineBasicBlock::iterator InsertPos = Remat.UseMI;
@@ -2121,15 +2116,14 @@ void PreRARematStage::rollback(const RollbackReg &Rollback) const {
unsigned DefRegion = MIRegion.at(Remat->DefMI);
MachineBasicBlock *MBB = RegionBB[DefRegion];
Register Reg = RematMI->getOperand(0).getReg();
- const TargetRegisterClass *RC = DAG.MRI.getRegClass(Reg);
- Register NewReg = DAG.MRI.createVirtualRegister(RC);
+ Register NewReg = DAG.MRI.cloneVirtualRegister(Reg);
// Re-rematerialize MI in its original region. Note that it may not be
// rematerialized exactly in the same position as originally within the
// region, but it should not matter much.
MachineBasicBlock::iterator InsertPos(DAG.Regions[DefRegion].second);
TII->reMaterialize(*MBB, InsertPos, NewReg, 0, *RematMI, *DAG.TRI);
- REMAT_DEBUG(dbgs() << "[" << DefRegion << "] Re-rematerialized as "
+ REMAT_DEBUG(dbgs() << '[' << DefRegion << "] Re-rematerialized as "
<< *std::prev(InsertPos));
Remat->UseMI->substituteRegister(Reg, NewReg, 0, *DAG.TRI);
DAG.deleteMI(Remat->UseRegion, RematMI);
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-rematerialization-scoring.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-rematerialization-scoring.mir
index 2fd9abace39a8..dc35cbc96db4c 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-rematerialization-scoring.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-rematerialization-scoring.mir
@@ -28,17 +28,6 @@
# | 5 |
# +---+
---- |
- define void @favor_always_benef() {
- ret void
- }
- define void @favor_live_through_in_high_freq_region() {
- ret void
- }
- define void @use_only_region_possible() {
- ret void
- }
----
# Rematerializing %32 is always beneficial because the defining and using
# regions have the same frequency. It should be rematerialized first.
name: favor_always_benef
@@ -51,12 +40,12 @@ body: |
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %mem_addr:sgpr_64(p4) = COPY $sgpr0_sgpr1
- ; CHECK-NEXT: %loop_if_bound:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ ; CHECK-NEXT: %mem_addr:sgpr_64 = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: %loop_if_bound:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; CHECK-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
; CHECK-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
- ; CHECK-NEXT: %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, %loop_if_bound(s32), implicit $exec
+ ; CHECK-NEXT: %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, %loop_if_bound, implicit $exec
; CHECK-NEXT: %loop_counter:sreg_32 = COPY %mem_data.sub1
; CHECK-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
; CHECK-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
@@ -126,10 +115,10 @@ body: |
bb.0:
liveins: $vgpr0, $sgpr0_sgpr1
- %mem_addr:sgpr_64(p4) = COPY $sgpr0_sgpr1
- %loop_if_bound:vgpr_32(s32) = COPY $vgpr0
- %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, killed %loop_if_bound(s32), implicit $exec
+ %mem_addr:sgpr_64 = COPY $sgpr0_sgpr1
+ %loop_if_bound:vgpr_32 = COPY $vgpr0
+ %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, killed %loop_if_bound, implicit $exec
%loop_counter:sreg_32 = COPY %mem_data.sub1
%0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
@@ -217,12 +206,12 @@ body: |
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %mem_addr:sgpr_64(p4) = COPY $sgpr0_sgpr1
- ; CHECK-NEXT: %loop_if_bound:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ ; CHECK-NEXT: %mem_addr:sgpr_64 = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: %loop_if_bound:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; CHECK-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
; CHECK-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
- ; CHECK-NEXT: %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, %loop_if_bound(s32), implicit $exec
+ ; CHECK-NEXT: %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, %loop_if_bound, implicit $exec
; CHECK-NEXT: %loop_counter:sreg_32 = COPY %mem_data.sub1
; CHECK-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
; CHECK-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
@@ -292,10 +281,10 @@ body: |
bb.0:
liveins: $vgpr0, $sgpr0_sgpr1
- %mem_addr:sgpr_64(p4) = COPY $sgpr0_sgpr1
- %loop_if_bound:vgpr_32(s32) = COPY $vgpr0
- %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, killed %loop_if_bound(s32), implicit $exec
+ %mem_addr:sgpr_64 = COPY $sgpr0_sgpr1
+ %loop_if_bound:vgpr_32 = COPY $vgpr0
+ %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, killed %loop_if_bound, implicit $exec
%loop_counter:sreg_32 = COPY %mem_data.sub1
%0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
@@ -381,9 +370,9 @@ body: |
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %mem_addr:sgpr_64(p4) = COPY $sgpr0_sgpr1
- ; CHECK-NEXT: %loop_if_bound:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ ; CHECK-NEXT: %mem_addr:sgpr_64 = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: %loop_if_bound:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; CHECK-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
; CHECK-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
; CHECK-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
@@ -408,7 +397,7 @@ body: |
; CHECK-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
; CHECK-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
; CHECK-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
- ; CHECK-NEXT: %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, %loop_if_bound(s32), implicit $exec
+ ; CHECK-NEXT: %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, %loop_if_bound, implicit $exec
; CHECK-NEXT: %loop_counter:sreg_32 = COPY %mem_data.sub1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
@@ -456,10 +445,10 @@ body: |
bb.0:
liveins: $vgpr0, $sgpr0_sgpr1
- %mem_addr:sgpr_64(p4) = COPY $sgpr0_sgpr1
- %loop_if_bound:vgpr_32(s32) = COPY $vgpr0
- %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr(p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, killed %loop_if_bound(s32), implicit $exec
+ %mem_addr:sgpr_64 = COPY $sgpr0_sgpr1
+ %loop_if_bound:vgpr_32 = COPY $vgpr0
+ %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, killed %loop_if_bound, implicit $exec
%loop_counter:sreg_32 = COPY %mem_data.sub1
%0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
index b6a13809a620b..584fd19eb178e 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
@@ -8695,7 +8695,6 @@ body: |
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
@@ -8737,8 +8736,9 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_39:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 49, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_40:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 50, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_41:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 51, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_42:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 52, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: undef [[V_RCP_F32_e32_:%[0-9]+]].sub0:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_RCP_F32_e32 [[DEF]].sub0, implicit $mode, implicit $exec
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_42:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 52, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: dead [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected
index 429bee4195fa9..bf8f3655be118 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected
@@ -83,9 +83,11 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" }
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8
; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:12
; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:16
-; CHECK-NEXT: s_mov_b64 s[4:5], 0
-; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
-; CHECK-NEXT: s_xor_b64 s[4:5], exec, s[6:7]
+; CHECK-NEXT: s_mov_b64 s[4:5], exec
+; CHECK-NEXT: s_mov_b64 s[6:7], 0
+; CHECK-NEXT: s_and_b64 s[6:7], s[4:5], s[6:7]
+; CHECK-NEXT: s_xor_b64 s[4:5], s[6:7], s[4:5]
+; CHECK-NEXT: s_mov_b64 exec, s[6:7]
; CHECK-NEXT: s_cbranch_execz .LBB0_2
; CHECK-NEXT: ; %bb.1:
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected
index 842fd8836da7e..f558c88505e85 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected
@@ -24,9 +24,11 @@ define dso_local i32 @check_boundaries() #0 {
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8
; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:12
; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:16
-; CHECK-NEXT: s_mov_b64 s[4:5], 0
-; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
-; CHECK-NEXT: s_xor_b64 s[4:5], exec, s[6:7]
+; CHECK-NEXT: s_mov_b64 s[4:5], exec
+; CHECK-NEXT: s_mov_b64 s[6:7], 0
+; CHECK-NEXT: s_and_b64 s[6:7], s[4:5], s[6:7]
+; CHECK-NEXT: s_xor_b64 s[4:5], s[6:7], s[4:5]
+; CHECK-NEXT: s_mov_b64 exec, s[6:7]
; CHECK-NEXT: s_cbranch_execz .LBB0_2
; CHECK-NEXT: ; %bb.1:
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4
>From f826d93201aa2d5f568d8588de76f0fddb6ddb4a Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Wed, 13 Aug 2025 13:19:01 +0000
Subject: [PATCH 04/10] Remove REMAT_DEBUG and break ties in score
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 14 +++++++-------
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 10 ++++++++--
2 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index d133964758485..3a3ec682fe416 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1105,16 +1105,16 @@ bool ClusteredLowOccStage::initGCNSchedStage() {
void PreRARematStage::printTargetRegions(bool PrintAll) const {
if (PrintAll) {
for (auto [I, Target] : enumerate(RPTargets))
- REMAT_DEBUG(dbgs() << " [" << I << "] " << Target << '\n');
+ dbgs() << REMAT_PREFIX << " [" << I << "] " << Target << '\n';
return;
}
if (TargetRegions.none()) {
- REMAT_DEBUG(dbgs() << "No target regions\n");
+ dbgs() << REMAT_PREFIX << "No target regions\n";
return;
}
- REMAT_DEBUG(dbgs() << "Target regions:\n");
+ dbgs() << REMAT_PREFIX << "Target regions:\n";
for (unsigned I : TargetRegions.set_bits())
- REMAT_DEBUG(dbgs() << " [" << I << "] " << RPTargets[I] << '\n');
+ dbgs() << REMAT_PREFIX << " [" << I << "] " << RPTargets[I] << '\n';
}
void PreRARematStage::RematReg::print(
@@ -1230,8 +1230,8 @@ bool PreRARematStage::initGCNSchedStage() {
}
unsetSatisifedRPTargets(RescheduleRegions);
+ LLVM_DEBUG(printTargetRegions());
#ifndef NDEBUG
- printTargetRegions();
unsigned RoundNum = 0;
#endif
@@ -1242,7 +1242,7 @@ bool PreRARematStage::initGCNSchedStage() {
// (Re-)Score and (re-)sort all remats in increasing score order.
for (ScoredRemat &Remat : ScoredRemats)
Remat.update(TargetRegions, RPTargets, RegionFreq, !TargetOcc);
- stable_sort(ScoredRemats);
+ sort(ScoredRemats);
REMAT_DEBUG({
dbgs() << "==== ROUND " << RoundNum << " ====\n";
@@ -1294,8 +1294,8 @@ bool PreRARematStage::initGCNSchedStage() {
unsetSatisifedRPTargets(Remat.Live);
}
+ LLVM_DEBUG(printTargetRegions());
#ifndef NDEBUG
- printTargetRegions();
++RoundNum;
#endif
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 8134a768a2543..8694218cdbb78 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -530,8 +530,14 @@ class PreRARematStage : public GCNSchedStage {
int getScore() const { return Score; }
- bool operator<(const ScoredRemat &O) const { return Score < O.Score; }
- bool operator==(const ScoredRemat &O) const { return Score == O.Score; }
+ bool operator<(const ScoredRemat &O) const {
+ // Break ties using pointer to rematerializable register. Since
+ // rematerializations are collected in instruction order, registers
+ // appearing earlier have a "higher score" than those appearing later.
+ if (Score == O.Score)
+ return Remat > O.Remat;
+ return Score < O.Score;
+ }
private:
/// Estimated save/restore latency costs for spilling a register to stack.
>From 39e11176824cf00ed1a87029838bcb7dd5e00617 Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Wed, 13 Aug 2025 15:43:11 +0000
Subject: [PATCH 05/10] Fix failing tests and rollback mask change
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 4 +-
.../AMDGPU/dbg-value-ends-sched-region.mir | 34 ++--
...ne-scheduler-rematerialization-scoring.mir | 126 ++++++------
.../machine-scheduler-sink-trivial-remats.mir | 188 +++++++++---------
llvm/test/CodeGen/AMDGPU/mfma-loop.ll | 162 +++++++--------
...ssert-dead-def-subreg-use-other-subreg.mir | 6 +-
6 files changed, 261 insertions(+), 259 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 3a3ec682fe416..48245268919b7 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1963,7 +1963,9 @@ PreRARematStage::RematReg::RematReg(
Live |= LiveOut;
// Store the register's lane bitmask.
- Mask = DAG.TRI->getSubRegIndexLaneMask(DefMI->getOperand(0).getSubReg());
+ unsigned SubIdx = DefMI->getOperand(0).getSubReg();
+ Mask = SubIdx ? DAG.TRI->getSubRegIndexLaneMask(SubIdx)
+ : DAG.MRI.getMaxLaneMaskForVReg(Reg);
}
MachineInstr *
diff --git a/llvm/test/CodeGen/AMDGPU/dbg-value-ends-sched-region.mir b/llvm/test/CodeGen/AMDGPU/dbg-value-ends-sched-region.mir
index b734370de69a3..b38dc4d21c10c 100644
--- a/llvm/test/CodeGen/AMDGPU/dbg-value-ends-sched-region.mir
+++ b/llvm/test/CodeGen/AMDGPU/dbg-value-ends-sched-region.mir
@@ -50,39 +50,39 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[V_MOV_B32_e32_:%[0-9]+]].sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[V_ADD_CO_U32_e64_:%[0-9]+]].sub0:vreg_64, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[DEF4]].sub0, [[DEF6]].sub0, 0, implicit $exec
- ; CHECK-NEXT: dead undef [[V_ADD_CO_U32_e64_:%[0-9]+]].sub1:vreg_64, dead [[V_ADDC_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[DEF4]].sub1, [[DEF6]].sub1, [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[DEF1]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
- ; CHECK-NEXT: dead [[COPY2:%[0-9]+]]:vreg_64 = COPY [[DEF]]
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF3]]
- ; CHECK-NEXT: dead [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[DEF2]]
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[DEF5]].sub1
- ; CHECK-NEXT: undef [[V_MOV_B32_e32_:%[0-9]+]].sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_CO_U32_e64_:%[0-9]+]].sub0:vreg_64, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[DEF3]].sub0, [[DEF5]].sub0, 0, implicit $exec
+ ; CHECK-NEXT: dead undef [[V_ADD_CO_U32_e64_:%[0-9]+]].sub1:vreg_64, dead [[V_ADDC_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[DEF3]].sub1, [[DEF5]].sub1, [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[DEF]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[COPY2:%[0-9]+]]:vreg_64 = COPY [[DEF8]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF2]]
+ ; CHECK-NEXT: dead [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[DEF1]]
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[DEF4]].sub1
; CHECK-NEXT: dead [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B32_e32_]].sub0
- ; CHECK-NEXT: dead [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_I32_e64 4, [[DEF7]], implicit $exec
- ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[DEF8]], 288, 0, implicit $exec :: (store (s64), addrspace 1)
+ ; CHECK-NEXT: dead [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_I32_e64 4, [[DEF6]], implicit $exec
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[DEF7]], 288, 0, implicit $exec :: (store (s64), addrspace 1)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = COPY [[COPY3]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[DEF5:%[0-9]+]].sub1:vreg_64 = COPY [[COPY5]]
+ ; CHECK-NEXT: undef [[DEF4:%[0-9]+]].sub1:vreg_64 = COPY [[COPY5]]
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-rematerialization-scoring.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-rematerialization-scoring.mir
index dc35cbc96db4c..37ea20a4683da 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-rematerialization-scoring.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-rematerialization-scoring.mir
@@ -209,38 +209,38 @@ body: |
; CHECK-NEXT: %mem_addr:sgpr_64 = COPY $sgpr0_sgpr1
; CHECK-NEXT: %loop_if_bound:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
; CHECK-NEXT: %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, %loop_if_bound, implicit $exec
; CHECK-NEXT: %loop_counter:sreg_32 = COPY %mem_data.sub1
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
; CHECK-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
@@ -255,8 +255,8 @@ body: |
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
- ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]], implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_31]]
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]], implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x7c000000), %bb.5(0x04000000)
@@ -269,10 +269,10 @@ body: |
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
- ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]]
- ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]], implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]], implicit [[V_CVT_I32_F64_e32_15]]
- ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]], implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_32]]
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]]
+ ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]], implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
+ ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]], implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]]
; CHECK-NEXT: S_BRANCH %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
@@ -373,21 +373,21 @@ body: |
; CHECK-NEXT: %mem_addr:sgpr_64 = COPY $sgpr0_sgpr1
; CHECK-NEXT: %loop_if_bound:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
; CHECK-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
; CHECK-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
; CHECK-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
@@ -425,18 +425,18 @@ body: |
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]]
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode
- ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]], implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]], implicit [[V_CVT_I32_F64_e32_24]]
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode
- ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_25]]
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_24]], implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]]
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]]
+ ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]], implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
; CHECK-NEXT: S_BRANCH %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
index 584fd19eb178e..0bf9e0b52207d 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
@@ -2044,87 +2044,105 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
- ; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
- ; GFX908-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 2
- ; GFX908-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 3
- ; GFX908-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 4
- ; GFX908-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 5
- ; GFX908-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 6
- ; GFX908-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 7
- ; GFX908-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 8
- ; GFX908-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 9
- ; GFX908-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 10
- ; GFX908-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
- ; GFX908-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 12
- ; GFX908-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 13
- ; GFX908-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 14
- ; GFX908-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 15
- ; GFX908-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 16
- ; GFX908-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 17
- ; GFX908-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 18
- ; GFX908-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 19
- ; GFX908-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 20
- ; GFX908-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 21
- ; GFX908-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
- ; GFX908-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
- ; GFX908-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
- ; GFX908-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
- ; GFX908-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
- ; GFX908-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
- ; GFX908-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
- ; GFX908-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
- ; GFX908-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 30
- ; GFX908-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 31
- ; GFX908-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 32
- ; GFX908-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 33
- ; GFX908-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 34
- ; GFX908-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 35
- ; GFX908-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 36
- ; GFX908-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 37
- ; GFX908-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 38
- ; GFX908-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 39
- ; GFX908-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 40
- ; GFX908-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 41
- ; GFX908-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 42
- ; GFX908-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 43
- ; GFX908-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 44
- ; GFX908-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 45
- ; GFX908-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 46
- ; GFX908-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
- ; GFX908-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 48
- ; GFX908-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 49
- ; GFX908-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 50
- ; GFX908-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 51
- ; GFX908-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 52
- ; GFX908-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 53
- ; GFX908-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 54
- ; GFX908-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 55
- ; GFX908-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 56
- ; GFX908-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 57
- ; GFX908-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 58
- ; GFX908-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 59
- ; GFX908-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 60
- ; GFX908-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 61
- ; GFX908-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 62
- ; GFX908-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 63
- ; GFX908-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 64
- ; GFX908-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 65
- ; GFX908-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 66
- ; GFX908-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 67
- ; GFX908-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 68
- ; GFX908-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
+ ; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 12
+ ; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 13
+ ; GFX908-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 14
+ ; GFX908-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 15
+ ; GFX908-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 16
+ ; GFX908-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 17
+ ; GFX908-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 18
+ ; GFX908-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 19
+ ; GFX908-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 20
+ ; GFX908-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 21
+ ; GFX908-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
+ ; GFX908-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
+ ; GFX908-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
+ ; GFX908-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
+ ; GFX908-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
+ ; GFX908-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
+ ; GFX908-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
+ ; GFX908-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
+ ; GFX908-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 30
+ ; GFX908-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 31
+ ; GFX908-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 32
+ ; GFX908-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 33
+ ; GFX908-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 34
+ ; GFX908-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 35
+ ; GFX908-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 36
+ ; GFX908-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 37
+ ; GFX908-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 38
+ ; GFX908-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 39
+ ; GFX908-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 40
+ ; GFX908-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 41
+ ; GFX908-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 42
+ ; GFX908-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 43
+ ; GFX908-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 44
+ ; GFX908-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 45
+ ; GFX908-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 46
+ ; GFX908-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
+ ; GFX908-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 48
+ ; GFX908-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 49
+ ; GFX908-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 50
+ ; GFX908-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 51
+ ; GFX908-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 52
+ ; GFX908-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 53
+ ; GFX908-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 54
+ ; GFX908-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 55
+ ; GFX908-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 56
+ ; GFX908-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 57
+ ; GFX908-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 58
+ ; GFX908-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 59
+ ; GFX908-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 60
+ ; GFX908-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 61
+ ; GFX908-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 62
+ ; GFX908-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 63
+ ; GFX908-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 64
+ ; GFX908-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 65
+ ; GFX908-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 66
+ ; GFX908-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 67
+ ; GFX908-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 68
+ ; GFX908-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
+ ; GFX908-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
+ ; GFX908-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
- ; GFX908-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
- ; GFX908-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
+ ; GFX908-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
+ ; GFX908-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
+ ; GFX908-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
+ ; GFX908-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
+ ; GFX908-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
+ ; GFX908-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 80
+ ; GFX908-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 81
+ ; GFX908-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 82
+ ; GFX908-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 83
+ ; GFX908-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 84
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+ ; GFX908-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_74]], implicit [[S_MOV_B32_75]]
+ ; GFX908-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 2
+ ; GFX908-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 3
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_76]], implicit [[S_MOV_B32_77]]
+ ; GFX908-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 4
+ ; GFX908-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 5
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_78]], implicit [[S_MOV_B32_79]]
+ ; GFX908-NEXT: [[S_MOV_B32_80:%[0-9]+]]:sgpr_32 = S_MOV_B32 6
+ ; GFX908-NEXT: [[S_MOV_B32_81:%[0-9]+]]:sgpr_32 = S_MOV_B32 7
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_80]], implicit [[S_MOV_B32_81]]
+ ; GFX908-NEXT: [[S_MOV_B32_82:%[0-9]+]]:sgpr_32 = S_MOV_B32 8
+ ; GFX908-NEXT: [[S_MOV_B32_83:%[0-9]+]]:sgpr_32 = S_MOV_B32 9
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_82]], implicit [[S_MOV_B32_83]]
+ ; GFX908-NEXT: [[S_MOV_B32_84:%[0-9]+]]:sgpr_32 = S_MOV_B32 10
+ ; GFX908-NEXT: [[S_MOV_B32_85:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_84]], implicit [[S_MOV_B32_85]]
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]]
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_5]], implicit [[S_MOV_B32_6]]
@@ -2161,25 +2179,7 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_67]], implicit [[S_MOV_B32_68]]
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_69]], implicit [[S_MOV_B32_70]]
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_71]], implicit [[S_MOV_B32_72]]
- ; GFX908-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_73]], implicit [[S_MOV_B32_74]]
- ; GFX908-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
- ; GFX908-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_76]], implicit [[S_MOV_B32_75]]
- ; GFX908-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
- ; GFX908-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_78]], implicit [[S_MOV_B32_77]]
- ; GFX908-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
- ; GFX908-NEXT: [[S_MOV_B32_80:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_80]], implicit [[S_MOV_B32_79]]
- ; GFX908-NEXT: [[S_MOV_B32_81:%[0-9]+]]:sgpr_32 = S_MOV_B32 81
- ; GFX908-NEXT: [[S_MOV_B32_82:%[0-9]+]]:sgpr_32 = S_MOV_B32 80
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_82]], implicit [[S_MOV_B32_81]]
- ; GFX908-NEXT: [[S_MOV_B32_83:%[0-9]+]]:sgpr_32 = S_MOV_B32 83
- ; GFX908-NEXT: [[S_MOV_B32_84:%[0-9]+]]:sgpr_32 = S_MOV_B32 82
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_84]], implicit [[S_MOV_B32_83]]
- ; GFX908-NEXT: [[S_MOV_B32_85:%[0-9]+]]:sgpr_32 = S_MOV_B32 84
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_85]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_73]]
; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_GT_U32_e64_]], implicit-def dead $scc
; GFX908-NEXT: $exec = S_MOV_B64_term [[S_AND_B64_]]
@@ -8695,6 +8695,7 @@ body: |
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
@@ -8736,9 +8737,8 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_39:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 49, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_40:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 50, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_41:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 51, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: undef [[V_RCP_F32_e32_:%[0-9]+]].sub0:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_RCP_F32_e32 [[DEF]].sub0, implicit $mode, implicit $exec
; GFX908-NEXT: [[V_CVT_I32_F64_e32_42:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 52, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: dead [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; GFX908-NEXT: undef [[V_RCP_F32_e32_:%[0-9]+]].sub0:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_RCP_F32_e32 [[DEF]].sub0, implicit $mode, implicit $exec
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
diff --git a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
index 0cef89867622e..6bc9b2fd0d316 100644
--- a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
@@ -43,12 +43,12 @@ define amdgpu_kernel void @test_mfma_loop_zeroinit(ptr addrspace(1) %arg) #0 {
; GFX908-NEXT: v_accvgpr_write_b32 a1, 0
; GFX908-NEXT: v_accvgpr_write_b32 a0, 0
; GFX908-NEXT: s_mov_b32 s0, 16
-; GFX908-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX908-NEXT: v_mov_b32_e32 v1, 2.0
+; GFX908-NEXT: v_mov_b32_e32 v0, 2.0
+; GFX908-NEXT: v_mov_b32_e32 v1, 1.0
; GFX908-NEXT: .LBB0_1: ; %for.cond.preheader
; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX908-NEXT: s_nop 1
-; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
+; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
; GFX908-NEXT: s_add_i32 s0, s0, -1
; GFX908-NEXT: s_cmp_lg_u32 s0, 0
; GFX908-NEXT: s_cbranch_scc1 .LBB0_1
@@ -134,12 +134,12 @@ define amdgpu_kernel void @test_mfma_loop_zeroinit(ptr addrspace(1) %arg) #0 {
; GFX90A-NEXT: v_accvgpr_write_b32 a1, 0
; GFX90A-NEXT: v_accvgpr_write_b32 a0, 0
; GFX90A-NEXT: s_mov_b32 s0, 16
-; GFX90A-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX90A-NEXT: v_mov_b32_e32 v1, 2.0
+; GFX90A-NEXT: v_mov_b32_e32 v0, 2.0
+; GFX90A-NEXT: v_mov_b32_e32 v1, 1.0
; GFX90A-NEXT: .LBB0_1: ; %for.cond.preheader
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX90A-NEXT: s_nop 1
-; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
+; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
; GFX90A-NEXT: s_add_i32 s0, s0, -1
; GFX90A-NEXT: s_cmp_lg_u32 s0, 0
; GFX90A-NEXT: s_cbranch_scc1 .LBB0_1
@@ -193,12 +193,12 @@ define amdgpu_kernel void @test_mfma_loop_zeroinit(ptr addrspace(1) %arg) #0 {
; GFX942-NEXT: v_accvgpr_write_b32 a1, 0
; GFX942-NEXT: v_accvgpr_write_b32 a0, 0
; GFX942-NEXT: s_mov_b32 s0, 16
-; GFX942-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX942-NEXT: v_mov_b32_e32 v1, 2.0
+; GFX942-NEXT: v_mov_b32_e32 v0, 2.0
+; GFX942-NEXT: v_mov_b32_e32 v1, 1.0
; GFX942-NEXT: .LBB0_1: ; %for.cond.preheader
; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[0:31]
+; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v1, v0, a[0:31]
; GFX942-NEXT: s_add_i32 s0, s0, -1
; GFX942-NEXT: s_cmp_lg_u32 s0, 0
; GFX942-NEXT: s_cbranch_scc1 .LBB0_1
@@ -242,7 +242,7 @@ define amdgpu_kernel void @test_mfma_loop_unfoldable_splat(ptr addrspace(1) %arg
; GFX908: ; %bb.0: ; %entry
; GFX908-NEXT: v_mov_b32_e32 v0, 0x42f60000
; GFX908-NEXT: s_mov_b32 s0, 16
-; GFX908-NEXT: v_mov_b32_e32 v1, 2.0
+; GFX908-NEXT: v_mov_b32_e32 v1, 1.0
; GFX908-NEXT: v_accvgpr_write_b32 a31, v0
; GFX908-NEXT: v_accvgpr_write_b32 a30, v0
; GFX908-NEXT: v_accvgpr_write_b32 a29, v0
@@ -275,11 +275,11 @@ define amdgpu_kernel void @test_mfma_loop_unfoldable_splat(ptr addrspace(1) %arg
; GFX908-NEXT: v_accvgpr_write_b32 a2, v0
; GFX908-NEXT: v_accvgpr_write_b32 a1, v0
; GFX908-NEXT: v_accvgpr_write_b32 a0, v0
-; GFX908-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX908-NEXT: v_mov_b32_e32 v0, 2.0
; GFX908-NEXT: .LBB1_1: ; %for.cond.preheader
; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX908-NEXT: s_nop 1
-; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
+; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
; GFX908-NEXT: s_add_i32 s0, s0, -1
; GFX908-NEXT: s_cmp_lg_u32 s0, 0
; GFX908-NEXT: s_cbranch_scc1 .LBB1_1
@@ -366,12 +366,12 @@ define amdgpu_kernel void @test_mfma_loop_unfoldable_splat(ptr addrspace(1) %arg
; GFX90A-NEXT: v_accvgpr_write_b32 a1, v0
; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0
; GFX90A-NEXT: s_mov_b32 s0, 16
-; GFX90A-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX90A-NEXT: v_mov_b32_e32 v1, 2.0
+; GFX90A-NEXT: v_mov_b32_e32 v0, 2.0
+; GFX90A-NEXT: v_mov_b32_e32 v1, 1.0
; GFX90A-NEXT: .LBB1_1: ; %for.cond.preheader
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX90A-NEXT: s_nop 1
-; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
+; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
; GFX90A-NEXT: s_add_i32 s0, s0, -1
; GFX90A-NEXT: s_cmp_lg_u32 s0, 0
; GFX90A-NEXT: s_cbranch_scc1 .LBB1_1
@@ -426,12 +426,12 @@ define amdgpu_kernel void @test_mfma_loop_unfoldable_splat(ptr addrspace(1) %arg
; GFX942-NEXT: v_accvgpr_write_b32 a1, v0
; GFX942-NEXT: v_accvgpr_write_b32 a0, v0
; GFX942-NEXT: s_mov_b32 s0, 16
-; GFX942-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX942-NEXT: v_mov_b32_e32 v1, 2.0
+; GFX942-NEXT: v_mov_b32_e32 v0, 2.0
+; GFX942-NEXT: v_mov_b32_e32 v1, 1.0
; GFX942-NEXT: .LBB1_1: ; %for.cond.preheader
; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[0:31]
+; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v1, v0, a[0:31]
; GFX942-NEXT: s_add_i32 s0, s0, -1
; GFX942-NEXT: s_cmp_lg_u32 s0, 0
; GFX942-NEXT: s_cbranch_scc1 .LBB1_1
@@ -501,12 +501,12 @@ define amdgpu_kernel void @test_mfma_loop_non_splat(ptr addrspace(1) %arg) #0 {
; GFX908-NEXT: v_accvgpr_write_b32 a2, 0
; GFX908-NEXT: v_accvgpr_write_b32 a0, 0
; GFX908-NEXT: s_mov_b32 s0, 16
-; GFX908-NEXT: v_mov_b32_e32 v0, 2.0
-; GFX908-NEXT: v_mov_b32_e32 v1, 1.0
+; GFX908-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX908-NEXT: v_mov_b32_e32 v1, 2.0
; GFX908-NEXT: .LBB2_1: ; %for.cond.preheader
; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX908-NEXT: s_nop 1
-; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
+; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
; GFX908-NEXT: s_add_i32 s0, s0, -1
; GFX908-NEXT: s_cmp_lg_u32 s0, 0
; GFX908-NEXT: s_cbranch_scc1 .LBB2_1
@@ -592,12 +592,12 @@ define amdgpu_kernel void @test_mfma_loop_non_splat(ptr addrspace(1) %arg) #0 {
; GFX90A-NEXT: v_accvgpr_write_b32 a2, 0
; GFX90A-NEXT: v_accvgpr_write_b32 a0, 0
; GFX90A-NEXT: s_mov_b32 s0, 16
-; GFX90A-NEXT: v_mov_b32_e32 v0, 2.0
-; GFX90A-NEXT: v_mov_b32_e32 v1, 1.0
+; GFX90A-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX90A-NEXT: v_mov_b32_e32 v1, 2.0
; GFX90A-NEXT: .LBB2_1: ; %for.cond.preheader
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX90A-NEXT: s_nop 1
-; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
+; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
; GFX90A-NEXT: s_add_i32 s0, s0, -1
; GFX90A-NEXT: s_cmp_lg_u32 s0, 0
; GFX90A-NEXT: s_cbranch_scc1 .LBB2_1
@@ -651,12 +651,12 @@ define amdgpu_kernel void @test_mfma_loop_non_splat(ptr addrspace(1) %arg) #0 {
; GFX942-NEXT: v_accvgpr_write_b32 a2, 0
; GFX942-NEXT: v_accvgpr_write_b32 a0, 0
; GFX942-NEXT: s_mov_b32 s0, 16
-; GFX942-NEXT: v_mov_b32_e32 v0, 2.0
-; GFX942-NEXT: v_mov_b32_e32 v1, 1.0
+; GFX942-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX942-NEXT: v_mov_b32_e32 v1, 2.0
; GFX942-NEXT: .LBB2_1: ; %for.cond.preheader
; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v1, v0, a[0:31]
+; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[0:31]
; GFX942-NEXT: s_add_i32 s0, s0, -1
; GFX942-NEXT: s_cmp_lg_u32 s0, 0
; GFX942-NEXT: s_cbranch_scc1 .LBB2_1
@@ -763,12 +763,12 @@ define amdgpu_kernel void @test_mfma_loop_unfoldable_seq(ptr addrspace(1) %arg)
; GFX908-NEXT: v_accvgpr_write_b32 a1, v1
; GFX908-NEXT: v_accvgpr_write_b32 a0, v2
; GFX908-NEXT: s_mov_b32 s0, 16
-; GFX908-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX908-NEXT: v_mov_b32_e32 v1, 2.0
+; GFX908-NEXT: v_mov_b32_e32 v0, 2.0
+; GFX908-NEXT: v_mov_b32_e32 v1, 1.0
; GFX908-NEXT: .LBB3_1: ; %for.cond.preheader
; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX908-NEXT: s_nop 1
-; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
+; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
; GFX908-NEXT: s_add_i32 s0, s0, -1
; GFX908-NEXT: s_cmp_lg_u32 s0, 0
; GFX908-NEXT: s_cbranch_scc1 .LBB3_1
@@ -886,12 +886,12 @@ define amdgpu_kernel void @test_mfma_loop_unfoldable_seq(ptr addrspace(1) %arg)
; GFX90A-NEXT: v_mov_b32_e32 v0, 0x42f60000
; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0
; GFX90A-NEXT: s_mov_b32 s0, 16
-; GFX90A-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX90A-NEXT: v_mov_b32_e32 v1, 2.0
+; GFX90A-NEXT: v_mov_b32_e32 v0, 2.0
+; GFX90A-NEXT: v_mov_b32_e32 v1, 1.0
; GFX90A-NEXT: .LBB3_1: ; %for.cond.preheader
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX90A-NEXT: s_nop 1
-; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
+; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
; GFX90A-NEXT: s_add_i32 s0, s0, -1
; GFX90A-NEXT: s_cmp_lg_u32 s0, 0
; GFX90A-NEXT: s_cbranch_scc1 .LBB3_1
@@ -977,12 +977,12 @@ define amdgpu_kernel void @test_mfma_loop_unfoldable_seq(ptr addrspace(1) %arg)
; GFX942-NEXT: v_mov_b32_e32 v0, 0x42f60000
; GFX942-NEXT: v_accvgpr_write_b32 a0, v0
; GFX942-NEXT: s_mov_b32 s0, 16
-; GFX942-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX942-NEXT: v_mov_b32_e32 v1, 2.0
+; GFX942-NEXT: v_mov_b32_e32 v0, 2.0
+; GFX942-NEXT: v_mov_b32_e32 v1, 1.0
; GFX942-NEXT: .LBB3_1: ; %for.cond.preheader
; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[0:31]
+; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v1, v0, a[0:31]
; GFX942-NEXT: s_add_i32 s0, s0, -1
; GFX942-NEXT: s_cmp_lg_u32 s0, 0
; GFX942-NEXT: s_cbranch_scc1 .LBB3_1
@@ -1052,12 +1052,12 @@ define amdgpu_kernel void @test_mfma_loop_vgpr_init(ptr addrspace(1) %arg) #0 {
; GFX908-NEXT: v_accvgpr_write_b32 a1, v0
; GFX908-NEXT: v_accvgpr_write_b32 a0, v0
; GFX908-NEXT: s_mov_b32 s0, 16
-; GFX908-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX908-NEXT: v_mov_b32_e32 v1, 2.0
+; GFX908-NEXT: v_mov_b32_e32 v0, 2.0
+; GFX908-NEXT: v_mov_b32_e32 v1, 1.0
; GFX908-NEXT: .LBB4_1: ; %for.cond.preheader
; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX908-NEXT: s_nop 1
-; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
+; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
; GFX908-NEXT: s_add_i32 s0, s0, -1
; GFX908-NEXT: s_cmp_lg_u32 s0, 0
; GFX908-NEXT: s_cbranch_scc1 .LBB4_1
@@ -1144,12 +1144,12 @@ define amdgpu_kernel void @test_mfma_loop_vgpr_init(ptr addrspace(1) %arg) #0 {
; GFX90A-NEXT: v_accvgpr_write_b32 a1, v0
; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0
; GFX90A-NEXT: s_mov_b32 s0, 16
-; GFX90A-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX90A-NEXT: v_mov_b32_e32 v1, 2.0
+; GFX90A-NEXT: v_mov_b32_e32 v0, 2.0
+; GFX90A-NEXT: v_mov_b32_e32 v1, 1.0
; GFX90A-NEXT: .LBB4_1: ; %for.cond.preheader
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX90A-NEXT: s_nop 1
-; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
+; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
; GFX90A-NEXT: s_add_i32 s0, s0, -1
; GFX90A-NEXT: s_cmp_lg_u32 s0, 0
; GFX90A-NEXT: s_cbranch_scc1 .LBB4_1
@@ -1204,12 +1204,12 @@ define amdgpu_kernel void @test_mfma_loop_vgpr_init(ptr addrspace(1) %arg) #0 {
; GFX942-NEXT: v_accvgpr_write_b32 a1, v0
; GFX942-NEXT: v_accvgpr_write_b32 a0, v0
; GFX942-NEXT: s_mov_b32 s0, 16
-; GFX942-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX942-NEXT: v_mov_b32_e32 v1, 2.0
+; GFX942-NEXT: v_mov_b32_e32 v0, 2.0
+; GFX942-NEXT: v_mov_b32_e32 v1, 1.0
; GFX942-NEXT: .LBB4_1: ; %for.cond.preheader
; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[0:31]
+; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v1, v0, a[0:31]
; GFX942-NEXT: s_add_i32 s0, s0, -1
; GFX942-NEXT: s_cmp_lg_u32 s0, 0
; GFX942-NEXT: s_cbranch_scc1 .LBB4_1
@@ -1282,7 +1282,7 @@ define amdgpu_kernel void @test_mfma_loop_sgpr_init(ptr addrspace(1) %arg, float
; GFX908-LABEL: test_mfma_loop_sgpr_init:
; GFX908: ; %bb.0: ; %entry
; GFX908-NEXT: s_load_dword s0, s[4:5], 0x2c
-; GFX908-NEXT: v_mov_b32_e32 v1, 2.0
+; GFX908-NEXT: v_mov_b32_e32 v1, 1.0
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
; GFX908-NEXT: v_mov_b32_e32 v0, s0
; GFX908-NEXT: s_mov_b32 s0, 16
@@ -1319,11 +1319,11 @@ define amdgpu_kernel void @test_mfma_loop_sgpr_init(ptr addrspace(1) %arg, float
; GFX908-NEXT: v_accvgpr_write_b32 a2, v0
; GFX908-NEXT: v_accvgpr_write_b32 a1, v0
; GFX908-NEXT: v_accvgpr_write_b32 a0, v0
-; GFX908-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX908-NEXT: v_mov_b32_e32 v0, 2.0
; GFX908-NEXT: .LBB5_1: ; %for.cond.preheader
; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX908-NEXT: s_nop 1
-; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
+; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
; GFX908-NEXT: s_add_i32 s0, s0, -1
; GFX908-NEXT: s_cmp_lg_u32 s0, 0
; GFX908-NEXT: s_cbranch_scc1 .LBB5_1
@@ -1378,7 +1378,7 @@ define amdgpu_kernel void @test_mfma_loop_sgpr_init(ptr addrspace(1) %arg, float
; GFX90A: ; %bb.0: ; %entry
; GFX90A-NEXT: s_load_dword s1, s[4:5], 0x2c
; GFX90A-NEXT: s_mov_b32 s0, 16
-; GFX90A-NEXT: v_mov_b32_e32 v1, 2.0
+; GFX90A-NEXT: v_mov_b32_e32 v1, 1.0
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: v_mov_b32_e32 v0, s1
; GFX90A-NEXT: v_accvgpr_write_b32 a31, v0
@@ -1413,11 +1413,11 @@ define amdgpu_kernel void @test_mfma_loop_sgpr_init(ptr addrspace(1) %arg, float
; GFX90A-NEXT: v_accvgpr_write_b32 a2, v0
; GFX90A-NEXT: v_accvgpr_write_b32 a1, v0
; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0
-; GFX90A-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX90A-NEXT: v_mov_b32_e32 v0, 2.0
; GFX90A-NEXT: .LBB5_1: ; %for.cond.preheader
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX90A-NEXT: s_nop 1
-; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
+; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
; GFX90A-NEXT: s_add_i32 s0, s0, -1
; GFX90A-NEXT: s_cmp_lg_u32 s0, 0
; GFX90A-NEXT: s_cbranch_scc1 .LBB5_1
@@ -1440,7 +1440,7 @@ define amdgpu_kernel void @test_mfma_loop_sgpr_init(ptr addrspace(1) %arg, float
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_load_dword s1, s[4:5], 0x2c
; GFX942-NEXT: s_mov_b32 s0, 16
-; GFX942-NEXT: v_mov_b32_e32 v1, 2.0
+; GFX942-NEXT: v_mov_b32_e32 v1, 1.0
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: v_mov_b32_e32 v0, s1
; GFX942-NEXT: v_accvgpr_write_b32 a31, v0
@@ -1475,11 +1475,11 @@ define amdgpu_kernel void @test_mfma_loop_sgpr_init(ptr addrspace(1) %arg, float
; GFX942-NEXT: v_accvgpr_write_b32 a2, v0
; GFX942-NEXT: v_accvgpr_write_b32 a1, v0
; GFX942-NEXT: v_accvgpr_write_b32 a0, v0
-; GFX942-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX942-NEXT: v_mov_b32_e32 v0, 2.0
; GFX942-NEXT: .LBB5_1: ; %for.cond.preheader
; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[0:31]
+; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v1, v0, a[0:31]
; GFX942-NEXT: s_add_i32 s0, s0, -1
; GFX942-NEXT: s_cmp_lg_u32 s0, 0
; GFX942-NEXT: s_cbranch_scc1 .LBB5_1
@@ -1585,12 +1585,12 @@ define amdgpu_kernel void @test_mfma_loop_mixed_init(ptr addrspace(1) %arg, floa
; GFX908-NEXT: v_accvgpr_write_b32 a2, 0
; GFX908-NEXT: v_accvgpr_write_b32 a1, v0
; GFX908-NEXT: s_mov_b32 s0, 16
-; GFX908-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX908-NEXT: v_mov_b32_e32 v1, 2.0
+; GFX908-NEXT: v_mov_b32_e32 v0, 2.0
+; GFX908-NEXT: v_mov_b32_e32 v1, 1.0
; GFX908-NEXT: .LBB6_1: ; %for.cond.preheader
; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX908-NEXT: s_nop 1
-; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
+; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
; GFX908-NEXT: s_add_i32 s0, s0, -1
; GFX908-NEXT: s_cmp_lg_u32 s0, 0
; GFX908-NEXT: s_cbranch_scc1 .LBB6_1
@@ -1680,12 +1680,12 @@ define amdgpu_kernel void @test_mfma_loop_mixed_init(ptr addrspace(1) %arg, floa
; GFX90A-NEXT: v_accvgpr_write_b32 a2, 0
; GFX90A-NEXT: s_mov_b32 s0, 16
; GFX90A-NEXT: v_accvgpr_write_b32 a1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX90A-NEXT: v_mov_b32_e32 v1, 2.0
+; GFX90A-NEXT: v_mov_b32_e32 v0, 2.0
+; GFX90A-NEXT: v_mov_b32_e32 v1, 1.0
; GFX90A-NEXT: .LBB6_1: ; %for.cond.preheader
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX90A-NEXT: s_nop 1
-; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
+; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
; GFX90A-NEXT: s_add_i32 s0, s0, -1
; GFX90A-NEXT: s_cmp_lg_u32 s0, 0
; GFX90A-NEXT: s_cbranch_scc1 .LBB6_1
@@ -1743,12 +1743,12 @@ define amdgpu_kernel void @test_mfma_loop_mixed_init(ptr addrspace(1) %arg, floa
; GFX942-NEXT: v_accvgpr_write_b32 a2, 0
; GFX942-NEXT: s_mov_b32 s0, 16
; GFX942-NEXT: v_accvgpr_write_b32 a1, v0
-; GFX942-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX942-NEXT: v_mov_b32_e32 v1, 2.0
+; GFX942-NEXT: v_mov_b32_e32 v0, 2.0
+; GFX942-NEXT: v_mov_b32_e32 v1, 1.0
; GFX942-NEXT: .LBB6_1: ; %for.cond.preheader
; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[0:31]
+; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v1, v0, a[0:31]
; GFX942-NEXT: s_add_i32 s0, s0, -1
; GFX942-NEXT: s_cmp_lg_u32 s0, 0
; GFX942-NEXT: s_cbranch_scc1 .LBB6_1
@@ -2565,12 +2565,12 @@ define <32 x float> @test_mfma_loop_zeroinit_ret_use() #0 {
; GFX908-NEXT: v_accvgpr_write_b32 a1, 0
; GFX908-NEXT: v_accvgpr_write_b32 a0, 0
; GFX908-NEXT: s_mov_b32 s4, 16
-; GFX908-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX908-NEXT: v_mov_b32_e32 v1, 2.0
+; GFX908-NEXT: v_mov_b32_e32 v0, 2.0
+; GFX908-NEXT: v_mov_b32_e32 v1, 1.0
; GFX908-NEXT: .LBB10_1: ; %for.cond.preheader
; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX908-NEXT: s_nop 1
-; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
+; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
; GFX908-NEXT: s_add_i32 s4, s4, -1
; GFX908-NEXT: s_cmp_lg_u32 s4, 0
; GFX908-NEXT: s_cbranch_scc1 .LBB10_1
@@ -2646,12 +2646,12 @@ define <32 x float> @test_mfma_loop_zeroinit_ret_use() #0 {
; GFX90A-NEXT: v_accvgpr_write_b32 a1, 0
; GFX90A-NEXT: v_accvgpr_write_b32 a0, 0
; GFX90A-NEXT: s_mov_b32 s4, 16
-; GFX90A-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX90A-NEXT: v_mov_b32_e32 v1, 2.0
+; GFX90A-NEXT: v_mov_b32_e32 v0, 2.0
+; GFX90A-NEXT: v_mov_b32_e32 v1, 1.0
; GFX90A-NEXT: .LBB10_1: ; %for.cond.preheader
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX90A-NEXT: s_nop 1
-; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
+; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
; GFX90A-NEXT: s_add_i32 s4, s4, -1
; GFX90A-NEXT: s_cmp_lg_u32 s4, 0
; GFX90A-NEXT: s_cbranch_scc1 .LBB10_1
@@ -2727,12 +2727,12 @@ define <32 x float> @test_mfma_loop_zeroinit_ret_use() #0 {
; GFX942-NEXT: v_accvgpr_write_b32 a1, 0
; GFX942-NEXT: v_accvgpr_write_b32 a0, 0
; GFX942-NEXT: s_mov_b32 s0, 16
-; GFX942-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX942-NEXT: v_mov_b32_e32 v1, 2.0
+; GFX942-NEXT: v_mov_b32_e32 v0, 2.0
+; GFX942-NEXT: v_mov_b32_e32 v1, 1.0
; GFX942-NEXT: .LBB10_1: ; %for.cond.preheader
; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[0:31]
+; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v1, v0, a[0:31]
; GFX942-NEXT: s_add_i32 s0, s0, -1
; GFX942-NEXT: s_cmp_lg_u32 s0, 0
; GFX942-NEXT: s_cbranch_scc1 .LBB10_1
@@ -2823,12 +2823,12 @@ define <32 x float> @test_mfma_loop_non_splat_ret_use() #0 {
; GFX908-NEXT: v_accvgpr_write_b32 a2, 0
; GFX908-NEXT: v_accvgpr_write_b32 a0, 0
; GFX908-NEXT: s_mov_b32 s4, 16
-; GFX908-NEXT: v_mov_b32_e32 v0, 2.0
-; GFX908-NEXT: v_mov_b32_e32 v1, 1.0
+; GFX908-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX908-NEXT: v_mov_b32_e32 v1, 2.0
; GFX908-NEXT: .LBB11_1: ; %for.cond.preheader
; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX908-NEXT: s_nop 1
-; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
+; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
; GFX908-NEXT: s_add_i32 s4, s4, -1
; GFX908-NEXT: s_cmp_lg_u32 s4, 0
; GFX908-NEXT: s_cbranch_scc1 .LBB11_1
@@ -2904,12 +2904,12 @@ define <32 x float> @test_mfma_loop_non_splat_ret_use() #0 {
; GFX90A-NEXT: v_accvgpr_write_b32 a2, 0
; GFX90A-NEXT: v_accvgpr_write_b32 a0, 0
; GFX90A-NEXT: s_mov_b32 s4, 16
-; GFX90A-NEXT: v_mov_b32_e32 v0, 2.0
-; GFX90A-NEXT: v_mov_b32_e32 v1, 1.0
+; GFX90A-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX90A-NEXT: v_mov_b32_e32 v1, 2.0
; GFX90A-NEXT: .LBB11_1: ; %for.cond.preheader
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX90A-NEXT: s_nop 1
-; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
+; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
; GFX90A-NEXT: s_add_i32 s4, s4, -1
; GFX90A-NEXT: s_cmp_lg_u32 s4, 0
; GFX90A-NEXT: s_cbranch_scc1 .LBB11_1
@@ -2985,12 +2985,12 @@ define <32 x float> @test_mfma_loop_non_splat_ret_use() #0 {
; GFX942-NEXT: v_accvgpr_write_b32 a2, 0
; GFX942-NEXT: v_accvgpr_write_b32 a0, 0
; GFX942-NEXT: s_mov_b32 s0, 16
-; GFX942-NEXT: v_mov_b32_e32 v0, 2.0
-; GFX942-NEXT: v_mov_b32_e32 v1, 1.0
+; GFX942-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX942-NEXT: v_mov_b32_e32 v1, 2.0
; GFX942-NEXT: .LBB11_1: ; %for.cond.preheader
; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v1, v0, a[0:31]
+; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[0:31]
; GFX942-NEXT: s_add_i32 s0, s0, -1
; GFX942-NEXT: s_cmp_lg_u32 s0, 0
; GFX942-NEXT: s_cbranch_scc1 .LBB11_1
diff --git a/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir b/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir
index 64a1752aef874..af882c06e1b4e 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir
@@ -24,7 +24,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_MOV_B32_e32_:%[0-9]+]].sub3:vreg_512 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 0, [[V_MOV_B32_e32_1]], implicit $exec
+ ; CHECK-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_512 = COPY [[V_MOV_B32_e32_]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
@@ -33,17 +33,17 @@ body: |
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]].sub3, undef %5:vgpr_32, $sgpr24_sgpr25_sgpr26_sgpr27, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32), align 8, addrspace 5)
; CHECK-NEXT: dead [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 undef %7:vgpr_32, 0, 0, implicit $exec
; CHECK-NEXT: dead [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 0, [[V_MOV_B32_e32_1]], implicit $exec
; CHECK-NEXT: dead [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[V_ADD_U32_e32_]], 0, 0, implicit $exec
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub1:vreg_512 = COPY [[COPY]].sub1
; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def dead [[COPY1]], 1835018 /* regdef:VGPR_32 */, def dead [[COPY]].sub1, 1835017 /* reguse:VGPR_32 */, [[COPY1]], 1835017 /* reguse:VGPR_32 */, [[COPY]].sub1
; CHECK-NEXT: [[COPY2:%[0-9]+]].sub0:vreg_512 = COPY [[COPY]].sub0
; CHECK-NEXT: [[COPY2:%[0-9]+]].sub3:vreg_512 = COPY [[COPY]].sub3
- ; CHECK-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK-NEXT: dead [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 4, [[V_MOV_B32_e32_2]], implicit-def dead $vcc, implicit $exec
; CHECK-NEXT: [[COPY2:%[0-9]+]].sub2:vreg_512 = COPY undef [[V_MOV_B32_e32_1]]
; CHECK-NEXT: [[COPY2:%[0-9]+]].sub5:vreg_512 = COPY undef [[V_MOV_B32_e32_1]]
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_512 = COPY [[COPY2]]
+ ; CHECK-NEXT: dead [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 4, [[V_MOV_B32_e32_2]], implicit-def dead $vcc, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.1
bb.0:
liveins: $sgpr6_sgpr7
>From 6291135ddf50201c8ae196094ef13d7354782d44 Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Tue, 30 Sep 2025 14:41:37 +0000
Subject: [PATCH 06/10] Address more feedback
- Use SchedModel instead of instruction itinerary
- Don't use getNumConvertedRegs to get number of regs, use RC instead.
- Clarify some comments.
- Other minor changes.
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 90 +++++++++++++--------
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 43 +++++++---
2 files changed, 88 insertions(+), 45 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 48245268919b7..da518bd784877 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1225,7 +1225,7 @@ bool PreRARematStage::initGCNSchedStage() {
<< "] REMAT (always) | " << *Remat.DefMI);
rematerialize(Remat, RecomputeRP);
} else {
- ScoredRemats.emplace_back(&Remat, DAG.ST, *DAG.TII);
+ ScoredRemats.emplace_back(&Remat, DAG);
}
}
unsetSatisifedRPTargets(RescheduleRegions);
@@ -1286,9 +1286,12 @@ bool PreRARematStage::initGCNSchedStage() {
REMAT_DEBUG(dbgs() << "[" << MIRegion[Remat.DefMI] << "] REMAT *" << Score
<< "* | " << *Remat.DefMI);
MachineInstr *RematMI = rematerialize(Remat, RecomputeRP);
- // Every rematerialization done with the objective of increasing occupancy
- // increases latency. If we don't manage to increase occupancy, we want to
- // roll them back.
+ // Every rematerialization we do here is likely to move the instruction
+ // into a higher frequency region, increasing the total sum latency of the
+ // instruction itself. This is acceptable if we are eliminating a spill in
+ // the process, but when the goal is increasing occupancy we get nothing
+ // out of rematerialization if occupancy is not increased in the end; in
+ // such cases we want to roll back the rematerialization.
if (TargetOcc)
Rollbackable.push_back({RematMI, &Remat});
unsetSatisifedRPTargets(Remat.Live);
@@ -1315,7 +1318,7 @@ bool PreRARematStage::initGCNSchedStage() {
DAG.Pressure[I] = RP;
unsigned NewRegionOcc = RP.getOccupancy(ST, DynamicVGPRBlockSize);
AchievedOcc = std::min(AchievedOcc, NewRegionOcc);
- REMAT_DEBUG(dbgs() << "[" << I << "] Achieved occupancy " << NewRegionOcc
+ REMAT_DEBUG(dbgs() << '[' << I << "] Achieved occupancy " << NewRegionOcc
<< " (" << RPTargets[I] << ")\n");
}
@@ -1878,9 +1881,9 @@ bool PreRARematStage::collectRematRegs(ArrayRef<uint64_t> RegionFreq) {
// regions containing rematerializable instructions.
DAG.RegionLiveOuts.buildLiveRegMap();
- // Set of registers already marked for potential remterialization; used for
- // remat chains checks.
- DenseSet<Register> RematRegSet;
+ // Set of registers already marked for potential remterialization; used to
+ // avoid rematerialization chains.
+ SmallSet<Register, 4> RematRegSet;
auto IsMORematable = [&RematRegSet](const MachineOperand &MO) -> bool {
return MO.isReg() && RematRegSet.contains(MO.getReg());
};
@@ -1979,15 +1982,45 @@ PreRARematStage::RematReg::insertMI(unsigned RegionIdx,
return NewMI;
}
+unsigned PreRARematStage::ScoredRemat::getNumRegs(
+ const GCNScheduleDAGMILive &DAG) const {
+ // FIXME: this doesn't account for the fact that the rematerialization may be
+ // for a subregister. In that case we will overestimate the number of
+ // registers involved. This is acceptable since this is purely used for the
+ // scoring heuristic, but we should find a way to compute the number of
+ // registers actually covered by the register/subregister pair.
+ Register Reg = Remat->DefMI->getOperand(0).getReg();
+ const TargetRegisterClass &RC = *DAG.MRI.getRegClass(Reg);
+ return divideCeil(DAG.TRI->getRegSizeInBits(RC), 32);
+}
+
+unsigned PreRARematStage::ScoredRemat::getLatencyGain(
+ const GCNScheduleDAGMILive &DAG) const {
+ if (hasUnknownLatencyGain())
+ return 0;
+
+ const TargetSchedModel &SchedModel = DAG.ST.getInstrInfo()->getSchedModel();
+
+ // Rematerializing the register to its using region changes the number of
+ // times we will execute it in total.
+ unsigned FreqDiff = Remat->UseFrequency - Remat->DefFrequency;
+ int RematLatDiff = FreqDiff * SchedModel.computeInstrLatency(Remat->DefMI);
+
+ // We assume that spilling the register means we have to insert a save in its
+ // defining region and a restore in its using region. Spill instruction
+ // opcodes do not have corresponding scheduling models so we cannot accurately
+ // estimate their latency. Since this is just meant as a heuristic, use the
+ // default high latency from the MC scheduling model.
+ int SpillLatDiff = SchedModel.getMCSchedModel()->DefaultHighLatency *
+ (Remat->DefFrequency + Remat->UseFrequency);
+
+ return SpillLatDiff - RematLatDiff;
+}
+
PreRARematStage::ScoredRemat::ScoredRemat(const RematReg *Remat,
- const GCNSubtarget &ST,
- const TargetInstrInfo &TII)
- : Remat(Remat) {
- const InstrItineraryData *Itin = ST.getInstrItineraryData();
- if (Remat->DefFrequency && Remat->UseFrequency) {
- InstrLatencyGain = Remat->DefFrequency - Remat->UseFrequency;
- *InstrLatencyGain *= TII.getInstrLatency(Itin, *Remat->DefMI);
- }
+ const GCNScheduleDAGMILive &DAG)
+ : Remat(Remat), NumRegs(getNumRegs(DAG)),
+ RematLatencyGainOverSpill(getLatencyGain(DAG)) {
resetScore();
}
@@ -2011,20 +2044,13 @@ void PreRARematStage::ScoredRemat::update(const BitVector &TargetRegions,
// we get by increasing occupancy and compare it to the latency hit each wave
// will be subjected to.
if (ReduceSpill) {
- // It may be better to let the register spill if it is defined by a very
- // high latency instruction. Try to estimate the latency gain induced by
- // rematerializing the register.
- //
- // If we don't know the rematerializations's latency gain we don't know
- // what to compare the spill latency against. We still consider the
- // rematerialization potentially beneficial in such cases because we don't
- // want to miss rematerialization opportunities and rematerializing is in
- // most cases cheaper than spilling. We still give a bonus to remats for
- // which we are able to do the calculation.
- if (InstrLatencyGain && *InstrLatencyGain < 0) {
- int SpillLatencyGain = SaveCost * Remat->DefFrequency;
- SpillLatencyGain += RestoreCost * Remat->UseFrequency;
- if (*InstrLatencyGain + SpillLatencyGain < 0)
+ // If we don't know the latency gain, we still consider the
+ // rematerialization potentially beneficial because we don't want to miss
+ // rematerialization opportunities and rematerializing is in most cases
+ // cheaper than spilling. We still give a bonus to remats for which we are
+ // able to do the calculation.
+ if (!hasUnknownLatencyGain()) {
+ if (RematLatencyGainOverSpill < 0)
return setUselessRemat();
setKnownLatencyGain();
}
@@ -2033,7 +2059,7 @@ void PreRARematStage::ScoredRemat::update(const BitVector &TargetRegions,
// The estimated RP reduction is proportional to the total frequency in target
// regions where the register is live.
Register Reg = Remat->DefMI->getOperand(0).getReg();
- unsigned RPScore = 0;
+ ScoreTy RPScore = 0;
for (unsigned I : TargetRegions.set_bits()) {
unsigned Freq = std::max(RegionFreq[I], static_cast<uint64_t>(1));
if (Remat->isBeneficialRegion(I))
@@ -2044,7 +2070,7 @@ void PreRARematStage::ScoredRemat::update(const BitVector &TargetRegions,
// The estimated RP reduction is directly proportional to the size of the
// rematerializable register.
- setRPScore(RPScore * SIRegisterInfo::getNumCoveredRegs(Remat->Mask));
+ setRPScore(RPScore * NumRegs);
}
MachineInstr *PreRARematStage::rematerialize(const RematReg &Remat,
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 8694218cdbb78..8d378b982b6a3 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -441,6 +441,19 @@ class ClusteredLowOccStage : public GCNSchedStage {
/// reducing spilling or increasing occupancy is possible, it tries to
/// rematerialize as few registers as possible to reduce potential negative
/// effects on function latency.
+///
+/// The stage only supports rematerializing registers that meet all of the
+/// following constraints.
+/// 1. The register is virtual and has a single defining instruction.
+/// 2. The single defining instruction is either deemed rematerializable by the
+/// target-independent logic, or if not, has no non-constant and
+/// non-ignorable physical register use.
+/// 3 The register has no virtual register use whose live range would be
+/// extended by the rematerialization.
+/// 4. The register has a single non-debug user in a different region from its
+/// defining region.
+/// 5. The register is not used by or using another register that is going to be
+/// rematerialized.
class PreRARematStage : public GCNSchedStage {
private:
/// Groups information about a rematerializable register.
@@ -520,8 +533,7 @@ class PreRARematStage : public GCNSchedStage {
/// This only initializes state-independent characteristics of \p Remat, not
/// the actual score.
- ScoredRemat(const RematReg *Remat, const GCNSubtarget &ST,
- const TargetInstrInfo &TII);
+ ScoredRemat(const RematReg *Remat, const GCNScheduleDAGMILive &DAG);
/// Updates the rematerialization's score w.r.t. the current \p RPTargets.
/// \p RegionFreq indicates the frequency of each region
@@ -540,19 +552,22 @@ class PreRARematStage : public GCNSchedStage {
}
private:
- /// Estimated save/restore latency costs for spilling a register to stack.
- /// FIXME: These numbers are very arbitrary. Need a good rationale for them,
- /// which I don't know where to get from.
- static constexpr int SaveCost = 100, RestoreCost = 100;
/// Per-region contribution weights to RP score depending on whether RP is
/// guaranteed or only likely to be reduced in the region. Only their
/// relative value w.r.t. one another matter.
static constexpr int WeightRP = 10, WeightRPMaybe = 5;
- /// Latency gain induced by rematerializing the instruction. Does not
- /// include estimated spilling cost of *not* rematerializing (save/restore
- /// to/from stack).
- std::optional<int> InstrLatencyGain = std::nullopt;
+ /// Number of 32-bit registers this rematerialization covers.
+ const unsigned NumRegs;
+ /// Latency gain induced by rematerializing the register over spilling its
+ /// defining instruction.
+ const int RematLatencyGainOverSpill;
+
+ /// Whether we can estimate the latency gain of rematerialazing over
+ /// spilling; this requires knowing defining/using region frequencies.
+ bool hasUnknownLatencyGain() const {
+ return !Remat->DefFrequency || !Remat->UseFrequency;
+ }
using ScoreTy = int32_t;
/// Overall rematerialization score. Scoring components are mapped to bit
@@ -568,9 +583,11 @@ class PreRARematStage : public GCNSchedStage {
void setKnownLatencyGain() { Score |= 1; }
- void setRPScore(unsigned RPScore) {
- Score |= static_cast<ScoreTy>(RPScore) << 1;
- }
+ void setRPScore(ScoreTy RPScore) { Score |= RPScore << 1; }
+
+ unsigned getNumRegs(const GCNScheduleDAGMILive &DAG) const;
+
+ unsigned getLatencyGain(const GCNScheduleDAGMILive &DAG) const;
};
/// Maps all MIs (except lone terminators, which are not part of any region)
>From 846238b1fc8943f599aff1dc45055bc33b5e508b Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Tue, 30 Sep 2025 22:49:21 +0000
Subject: [PATCH 07/10] Rebase for new test + improve comment
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 5 ++++-
llvm/test/CodeGen/AMDGPU/uniform-alignbit.ll | 6 +++---
2 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 8d378b982b6a3..2cf8ec993092b 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -484,6 +484,9 @@ class PreRARematStage : public GCNSchedStage {
}
/// Returns whether is is always beneficial to rematerialize this register.
+ /// These are rematerializations that never move instructions into higher
+ /// frequency regions and at least shorten live intervals, so they are
+ /// always useful irrespective of RP targets.
bool isAlwaysBeneficial() const {
// When the using region is executed a single time, we know
// rematerializing will be beneficial whatever the defining region's
@@ -555,7 +558,7 @@ class PreRARematStage : public GCNSchedStage {
/// Per-region contribution weights to RP score depending on whether RP is
/// guaranteed or only likely to be reduced in the region. Only their
/// relative value w.r.t. one another matter.
- static constexpr int WeightRP = 10, WeightRPMaybe = 5;
+ static constexpr int WeightRP = 2, WeightRPMaybe = 1;
/// Number of 32-bit registers this rematerialization covers.
const unsigned NumRegs;
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-alignbit.ll b/llvm/test/CodeGen/AMDGPU/uniform-alignbit.ll
index fe8c90ee7b686..5de84eed410c6 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-alignbit.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-alignbit.ll
@@ -11,11 +11,11 @@ define amdgpu_kernel void @uniform_build_vector(i64 %in, ptr addrspace(1) %out)
; GCN-NEXT: s_mov_b32 s6, s5
; GCN-NEXT: s_mov_b32 s7, s5
; GCN-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
-; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: ; sched_barrier mask(0x00000000)
+; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_and_b32_e32 v0, 1, v0
; GCN-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
+; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: global_store_dword v1, v0, s[2:3]
; GCN-NEXT: s_endpgm
entry:
@@ -35,4 +35,4 @@ entry:
declare void @llvm.amdgcn.sched.barrier(i32 immarg) #0
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read)
-declare <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32>, i32, i32, i32 immarg) #1
\ No newline at end of file
+declare <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32>, i32, i32, i32 immarg) #1
>From 5137519c1789c6e8d490b1a9e39aa8a011f312c1 Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Tue, 7 Oct 2025 14:54:26 +0000
Subject: [PATCH 08/10] Refactor scoring system + Remove always benef/latency
calc
- Change components of scoring system.
- Remove rematerialization of always beneficial registers.
- Remove latency calculation in scoring.
- Other small changes.
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 382 ++++-----
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 184 ++--
.../GlobalISel/llvm.amdgcn.wqm.demote.ll | 20 +-
.../AMDGPU/GlobalISel/vni8-across-blocks.ll | 32 +-
llvm/test/CodeGen/AMDGPU/call-waitcnt.ll | 8 +-
.../AMDGPU/dbg-value-starts-sched-region.mir | 4 +-
.../AMDGPU/debug-value-scheduler-crash.mir | 58 +-
llvm/test/CodeGen/AMDGPU/ds_read2-gfx1250.ll | 6 +-
llvm/test/CodeGen/AMDGPU/ds_read2.ll | 12 +-
.../test/CodeGen/AMDGPU/dynamic_stackalloc.ll | 25 +-
.../CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll | 15 +-
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll | 16 +-
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll | 12 +-
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll | 12 +-
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll | 12 +-
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll | 12 +-
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll | 16 +-
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll | 12 +-
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll | 12 +-
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll | 12 +-
...ne-scheduler-rematerialization-scoring.mir | 70 +-
...ine-scheduler-sink-trivial-remats-attr.mir | 724 ++++++++--------
.../machine-scheduler-sink-trivial-remats.mir | 804 +++++++++---------
...ne-sink-temporal-divergence-swdev407790.ll | 17 +-
...ched-assert-onlydbg-value-empty-region.mir | 2 +-
llvm/test/CodeGen/AMDGPU/schedule-ilp.mir | 16 +-
.../AMDGPU/spill-empty-live-interval.mir | 2 +-
llvm/test/CodeGen/AMDGPU/uniform-alignbit.ll | 4 +-
.../test/CodeGen/AMDGPU/vni8-across-blocks.ll | 84 +-
llvm/test/CodeGen/AMDGPU/wave32.ll | 4 +-
30 files changed, 1292 insertions(+), 1297 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index da518bd784877..e669daebd2d37 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1117,25 +1117,32 @@ void PreRARematStage::printTargetRegions(bool PrintAll) const {
dbgs() << REMAT_PREFIX << " [" << I << "] " << RPTargets[I] << '\n';
}
-void PreRARematStage::RematReg::print(
- const DenseMap<MachineInstr *, unsigned> &MIRegion) const {
- dbgs() << REMAT_PREFIX << " [" << MIRegion.at(DefMI) << "] " << *DefMI;
+void PreRARematStage::RematReg::print() const {
+ dbgs() << REMAT_PREFIX << " [" << DefRegion << "] " << *DefMI;
dbgs() << REMAT_PREFIX << " -> used in [" << UseRegion << "] " << *UseMI;
- const unsigned NumRegions = Live.size();
dbgs() << REMAT_PREFIX << " Guaranteed RP reduction in:";
- for (unsigned I = 0; I < NumRegions; ++I) {
- if (isBeneficialRegion(I))
+ for (unsigned I : Live.set_bits()) {
+ if (isUnusedLiveThrough(I))
dbgs() << " [" << I << "]";
}
dbgs() << '\n';
dbgs() << REMAT_PREFIX << " Possible RP reduction in:";
- for (unsigned I = 0; I < NumRegions; ++I) {
- if (isMaybeBeneficialRegion(I))
+ for (unsigned I : Live.set_bits()) {
+ if (!isUnusedLiveThrough(I))
dbgs() << " [" << I << "]";
}
dbgs() << '\n';
}
+void PreRARematStage::ScoredRemat::print() const {
+ ScoreTy ShiftScore = Score;
+ ScoreTy RegionImpact = ShiftScore & ((1 << RegionImpactWidth) - 1);
+ ShiftScore >>= RegionImpactWidth;
+ ScoreTy FreqDiff = ShiftScore & ((1 << FreqDiffWidth) - 1);
+ ShiftScore >>= FreqDiffWidth;
+ ScoreTy MaxFreq = ShiftScore;
+ dbgs() << '(' << MaxFreq << ", " << FreqDiff << ", " << RegionImpact << ')';
+}
#endif
bool PreRARematStage::initGCNSchedStage() {
@@ -1147,6 +1154,11 @@ bool PreRARematStage::initGCNSchedStage() {
if (!GCNSchedStage::initGCNSchedStage() || DAG.Regions.size() <= 1)
return false;
+ // Maps all MIs (except lone terminators, which are not part of any region) to
+ // their parent region. Non-lone terminators are considered part of the region
+ // they delimitate.
+ DenseMap<MachineInstr *, unsigned> MIRegion(MF.getInstructionCount());
+
// Before performing any IR modification record the parent region of each MI
// and the parent MBB of each region.
const unsigned NumRegions = DAG.Regions.size();
@@ -1160,15 +1172,19 @@ bool PreRARematStage::initGCNSchedStage() {
RegionBB.push_back(ParentMBB);
}
- setObjective();
+ // Set an objective for the stage based on current RP in each region.
REMAT_DEBUG({
dbgs() << "Analyzing ";
MF.getFunction().printAsOperand(dbgs(), false);
dbgs() << ": ";
- if (TargetRegions.none()) {
- dbgs() << "no objective to achieve, occupancy is maximal at "
- << MFI.getMaxWavesPerEU() << '\n';
- } else if (TargetOcc) {
+ });
+ if (!setObjective()) {
+ LLVM_DEBUG(dbgs() << "no objective to achieve, occupancy is maximal at "
+ << MFI.getMaxWavesPerEU() << '\n');
+ return false;
+ }
+ LLVM_DEBUG({
+ if (TargetOcc) {
dbgs() << "increase occupancy from " << *TargetOcc - 1 << '\n';
} else {
dbgs() << "reduce spilling (minimum target occupancy is "
@@ -1183,12 +1199,14 @@ bool PreRARematStage::initGCNSchedStage() {
assert(DAG.MLI && "MLI not defined in DAG");
MachineBranchProbabilityInfo MBPI;
MachineBlockFrequencyInfo MBFI(MF, MBPI, *DAG.MLI);
- uint64_t EntryFreq = MBFI.getEntryFreq().getFrequency();
- if (EntryFreq) {
- for (const MachineBasicBlock *MBB : RegionBB)
- RegionFreq.push_back(MBFI.getBlockFreq(MBB).getFrequency() / EntryFreq);
- } else {
- RegionFreq.insert(RegionFreq.end(), RegionBB.size(), 0);
+ uint64_t MinFreq = MBFI.getEntryFreq().getFrequency(), MaxFreq = 0;
+ for (const MachineBasicBlock *MBB : RegionBB) {
+ uint64_t BlockFreq = MBFI.getBlockFreq(MBB).getFrequency();
+ RegionFreq.push_back(BlockFreq);
+ if (BlockFreq < MinFreq)
+ MinFreq = BlockFreq;
+ else if (BlockFreq > MaxFreq)
+ MaxFreq = BlockFreq;
}
REMAT_DEBUG({
dbgs() << "Region frequencies:\n";
@@ -1202,43 +1220,27 @@ bool PreRARematStage::initGCNSchedStage() {
}
});
- if (!collectRematRegs(RegionFreq)) {
+ if (!collectRematRegs(MIRegion, RegionFreq)) {
REMAT_DEBUG(dbgs() << "No rematerializable registers\n");
return false;
}
-
REMAT_DEBUG({
dbgs() << "Rematerializable registers:\n";
for (const RematReg &Remat : RematRegs)
- Remat.print(MIRegion);
+ Remat.print();
});
- // Start by rematerializing always beneficial registers. These should never
- // be rollbacked. All other rematerialization candidates get added to list of
- // rematerializations that will be scored.
- REMAT_DEBUG(dbgs() << "==== ALWAYS BENEFICIAL ====\n");
SmallVector<ScoredRemat> ScoredRemats;
+ for (const RematReg &Remat : RematRegs)
+ ScoredRemats.emplace_back(&Remat, MinFreq, MaxFreq, DAG);
BitVector RecomputeRP(NumRegions);
- for (const RematReg &Remat : RematRegs) {
- if (Remat.isAlwaysBeneficial()) {
- REMAT_DEBUG(dbgs() << "[" << MIRegion[Remat.DefMI]
- << "] REMAT (always) | " << *Remat.DefMI);
- rematerialize(Remat, RecomputeRP);
- } else {
- ScoredRemats.emplace_back(&Remat, DAG);
- }
- }
- unsetSatisifedRPTargets(RescheduleRegions);
- LLVM_DEBUG(printTargetRegions());
+// Rematerialize registers in successive rounds until all RP targets are
+// satisifed or until we run out of rematerialization candidates.
#ifndef NDEBUG
unsigned RoundNum = 0;
#endif
-
- // Rematerialize registers in successive rounds until all RP targets are
- // satisifed or until we run out of rematerialization candidates.
- while ((updateAndVerifyRPTargets(RecomputeRP) || TargetRegions.any()) &&
- !ScoredRemats.empty()) {
+ do {
// (Re-)Score and (re-)sort all remats in increasing score order.
for (ScoredRemat &Remat : ScoredRemats)
Remat.update(TargetRegions, RPTargets, RegionFreq, !TargetOcc);
@@ -1247,27 +1249,26 @@ bool PreRARematStage::initGCNSchedStage() {
REMAT_DEBUG({
dbgs() << "==== ROUND " << RoundNum << " ====\n";
for (const ScoredRemat &SRemat : ScoredRemats) {
- dbgs() << REMAT_PREFIX << "*" << SRemat.getScore() << "* | "
- << *SRemat.Remat->DefMI;
+ dbgs() << REMAT_PREFIX;
+ SRemat.print();
+ dbgs() << " | " << *SRemat.Remat->DefMI;
}
+ printTargetRegions();
});
RecomputeRP.reset();
int RematIdx = ScoredRemats.size() - 1;
- // Rematerialize registers in decreasing score order until we estimate that
- // all RP targets are satisfied or until rematerialization candidates are no
- // longer useful to decrease RP.
+ // Rematerialize registers in decreasing score order until we estimate
+ // that all RP targets are satisfied or until rematerialization candidates
+ // are no longer useful to decrease RP.
for (; RematIdx >= 0 && TargetRegions.any(); --RematIdx) {
const RematReg &Remat = *ScoredRemats[RematIdx].Remat;
- int Score = ScoredRemats[RematIdx].getScore();
-
- // Stop when scores become negative. Since scores monotonically decrease
- // as remats are performed, we know there is nothing useful left to do in
- // such cases.
- if (Score <= 0) {
- REMAT_DEBUG(dbgs() << "Stop remats on non-positive score | "
- << *Remat.DefMI);
+ // Stop on null score. Since scores monotonically decrease as we
+ // rematerialize, we know there is nothing useful left to do in such
+ // cases.
+ if (ScoredRemats[RematIdx].hasNullScore()) {
+ REMAT_DEBUG(dbgs() << "*** Stop on null score | " << *Remat.DefMI);
RematIdx = -1;
break;
}
@@ -1278,33 +1279,40 @@ bool PreRARematStage::initGCNSchedStage() {
// reality, in which case we interrupt this round and re-score. This also
// ensures that every rematerialization we perform is possibly impactful
// in at least one target region.
- if (!Remat.intersectWithTarget(TargetRegions)) {
- REMAT_DEBUG(dbgs() << "Stop round on stale score | " << *Remat.DefMI);
+ if (!Remat.maybeBeneficial(TargetRegions, RPTargets)) {
+ REMAT_DEBUG(dbgs() << "*** Stop round on stale score | "
+ << *Remat.DefMI);
break;
}
- REMAT_DEBUG(dbgs() << "[" << MIRegion[Remat.DefMI] << "] REMAT *" << Score
- << "* | " << *Remat.DefMI);
- MachineInstr *RematMI = rematerialize(Remat, RecomputeRP);
+ REMAT_DEBUG(dbgs() << "*** REMAT [" << Remat.DefRegion << " -> "
+ << Remat.UseRegion << "] | " << *Remat.DefMI);
// Every rematerialization we do here is likely to move the instruction
// into a higher frequency region, increasing the total sum latency of the
// instruction itself. This is acceptable if we are eliminating a spill in
// the process, but when the goal is increasing occupancy we get nothing
// out of rematerialization if occupancy is not increased in the end; in
// such cases we want to roll back the rematerialization.
- if (TargetOcc)
- Rollbackable.push_back({RematMI, &Remat});
+ RollbackInfo *Rollback =
+ TargetOcc ? &Rollbacks.emplace_back(&Remat) : nullptr;
+ rematerialize(Remat, RecomputeRP, Rollback);
unsetSatisifedRPTargets(Remat.Live);
}
- LLVM_DEBUG(printTargetRegions());
#ifndef NDEBUG
++RoundNum;
#endif
+ REMAT_DEBUG({
+ if (!TargetRegions.any())
+ dbgs() << "*** Stop round on all targets achieved\n";
+ else if (RematIdx == -1)
+ dbgs() << "*** Stop round on exhausted remat opportunities\n";
+ });
// Peel off registers we already rematerialized from the vector's tail.
ScoredRemats.truncate(RematIdx + 1);
- }
+ } while ((updateAndVerifyRPTargets(RecomputeRP) || TargetRegions.any()) &&
+ !ScoredRemats.empty());
if (RescheduleRegions.none())
return false;
@@ -1842,7 +1850,7 @@ void GCNSchedStage::revertScheduling() {
DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
}
-void PreRARematStage::setObjective() {
+bool PreRARematStage::setObjective() {
const Function &F = MF.getFunction();
// Set up "spilling targets" for all regions.
@@ -1872,9 +1880,13 @@ void PreRARematStage::setObjective() {
TargetRegions.set(I);
}
}
+
+ return TargetRegions.any();
}
-bool PreRARematStage::collectRematRegs(ArrayRef<uint64_t> RegionFreq) {
+bool PreRARematStage::collectRematRegs(
+ const DenseMap<MachineInstr *, unsigned> &MIRegion,
+ ArrayRef<uint64_t> RegionFreq) {
assert(RegionFreq.size() == DAG.Regions.size());
// We need up-to-date live-out info. to query live-out register masks in
@@ -1884,14 +1896,14 @@ bool PreRARematStage::collectRematRegs(ArrayRef<uint64_t> RegionFreq) {
// Set of registers already marked for potential remterialization; used to
// avoid rematerialization chains.
SmallSet<Register, 4> RematRegSet;
- auto IsMORematable = [&RematRegSet](const MachineOperand &MO) -> bool {
+ auto IsMarkedForRemat = [&RematRegSet](const MachineOperand &MO) -> bool {
return MO.isReg() && RematRegSet.contains(MO.getReg());
};
// Identify rematerializable instructions in the function.
for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
- auto Region = DAG.Regions[I];
- for (auto MI = Region.first; MI != Region.second; ++MI) {
+ RegionBoundaries Bounds = DAG.Regions[I];
+ for (auto MI = Bounds.first; MI != Bounds.second; ++MI) {
// The instruction must be rematerializable.
MachineInstr &DefMI = *MI;
if (!isReMaterializable(DefMI))
@@ -1922,7 +1934,8 @@ bool PreRARematStage::collectRematRegs(ArrayRef<uint64_t> RegionFreq) {
// deferring the MBB RP walk until the entire chain has been
// rematerialized.
const MachineOperand &UseMO = UseMI->getOperand(0);
- if (IsMORematable(UseMO) || llvm::any_of(DefMI.operands(), IsMORematable))
+ if (IsMarkedForRemat(UseMO) ||
+ llvm::any_of(DefMI.operands(), IsMarkedForRemat))
continue;
// Do not rematerialize an instruction it it uses registers that aren't
@@ -1946,13 +1959,13 @@ PreRARematStage::RematReg::RematReg(
MachineInstr *DefMI, MachineInstr *UseMI, GCNScheduleDAGMILive &DAG,
const DenseMap<MachineInstr *, unsigned> &MIRegion,
ArrayRef<uint64_t> RegionFreq)
- : DefMI(DefMI), UseMI(UseMI), UseRegion(MIRegion.at(UseMI)),
- LiveIn(DAG.Regions.size()), LiveOut(DAG.Regions.size()),
- Live(DAG.Regions.size()), DefFrequency(RegionFreq[MIRegion.at(DefMI)]),
- UseFrequency(RegionFreq[MIRegion.at(UseMI)]) {
+ : DefMI(DefMI), UseMI(UseMI), LiveIn(DAG.Regions.size()),
+ LiveOut(DAG.Regions.size()), Live(DAG.Regions.size()),
+ DefRegion(MIRegion.at(DefMI)), UseRegion(MIRegion.at(UseMI)),
+ DefFrequency(RegionFreq[DefRegion]), UseFrequency(RegionFreq[UseRegion]) {
// Mark regions in which the rematerializable register is live.
- Register Reg = DefMI->getOperand(0).getReg();
+ Register Reg = getReg();
for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
auto LiveInIt = DAG.LiveIns[I].find(Reg);
if (LiveInIt != DAG.LiveIns[I].end() && LiveInIt->second.any())
@@ -1971,17 +1984,32 @@ PreRARematStage::RematReg::RematReg(
: DAG.MRI.getMaxLaneMaskForVReg(Reg);
}
-MachineInstr *
-PreRARematStage::RematReg::insertMI(unsigned RegionIdx,
- MachineBasicBlock::iterator InsertPos,
- GCNScheduleDAGMILive &DAG) const {
- MachineInstr *NewMI = &*std::prev(InsertPos);
- DAG.updateRegionBoundaries(DAG.Regions[RegionIdx], InsertPos, NewMI);
- DAG.LIS->InsertMachineInstrInMaps(*NewMI);
- DAG.LIS->createAndComputeVirtRegInterval(NewMI->getOperand(0).getReg());
- return NewMI;
+bool PreRARematStage::RematReg::maybeBeneficial(
+ const BitVector &TargetRegions, ArrayRef<GCNRPTarget> RPTargets) const {
+ Register Reg = getReg();
+ for (unsigned I : TargetRegions.set_bits()) {
+ if (Live[I] && RPTargets[I].isSaveBeneficial(Reg))
+ return true;
+ }
+ return false;
+}
+
+void PreRARematStage::RematReg::insertMI(unsigned RegionIdx,
+ MachineInstr *RematMI,
+ GCNScheduleDAGMILive &DAG) const {
+ RegionBoundaries &Bounds = DAG.Regions[RegionIdx];
+ if (Bounds.first == std::next(MachineBasicBlock::iterator(RematMI)))
+ Bounds.first = RematMI;
+ DAG.LIS->InsertMachineInstrInMaps(*RematMI);
+ DAG.LIS->createAndComputeVirtRegInterval(RematMI->getOperand(0).getReg());
}
+PreRARematStage::ScoredRemat::ScoredRemat(const RematReg *Remat,
+ uint64_t MinFreq, uint64_t MaxFreq,
+ const GCNScheduleDAGMILive &DAG)
+ : Remat(Remat), NumRegs(getNumRegs(DAG)),
+ FreqDiff(getFreqDiff(MinFreq, MaxFreq)) {}
+
unsigned PreRARematStage::ScoredRemat::getNumRegs(
const GCNScheduleDAGMILive &DAG) const {
// FIXME: this doesn't account for the fact that the rematerialization may be
@@ -1989,92 +2017,61 @@ unsigned PreRARematStage::ScoredRemat::getNumRegs(
// registers involved. This is acceptable since this is purely used for the
// scoring heuristic, but we should find a way to compute the number of
// registers actually covered by the register/subregister pair.
- Register Reg = Remat->DefMI->getOperand(0).getReg();
- const TargetRegisterClass &RC = *DAG.MRI.getRegClass(Reg);
+ const TargetRegisterClass &RC = *DAG.MRI.getRegClass(Remat->getReg());
return divideCeil(DAG.TRI->getRegSizeInBits(RC), 32);
}
-unsigned PreRARematStage::ScoredRemat::getLatencyGain(
- const GCNScheduleDAGMILive &DAG) const {
- if (hasUnknownLatencyGain())
- return 0;
-
- const TargetSchedModel &SchedModel = DAG.ST.getInstrInfo()->getSchedModel();
-
- // Rematerializing the register to its using region changes the number of
- // times we will execute it in total.
- unsigned FreqDiff = Remat->UseFrequency - Remat->DefFrequency;
- int RematLatDiff = FreqDiff * SchedModel.computeInstrLatency(Remat->DefMI);
-
- // We assume that spilling the register means we have to insert a save in its
- // defining region and a restore in its using region. Spill instruction
- // opcodes do not have corresponding scheduling models so we cannot accurately
- // estimate their latency. Since this is just meant as a heuristic, use the
- // default high latency from the MC scheduling model.
- int SpillLatDiff = SchedModel.getMCSchedModel()->DefaultHighLatency *
- (Remat->DefFrequency + Remat->UseFrequency);
-
- return SpillLatDiff - RematLatDiff;
-}
-
-PreRARematStage::ScoredRemat::ScoredRemat(const RematReg *Remat,
- const GCNScheduleDAGMILive &DAG)
- : Remat(Remat), NumRegs(getNumRegs(DAG)),
- RematLatencyGainOverSpill(getLatencyGain(DAG)) {
- resetScore();
+uint64_t PreRARematStage::ScoredRemat::getFreqDiff(uint64_t MinFreq,
+ uint64_t MaxFreq) const {
+ uint64_t DefOrMin = Remat->DefFrequency ? Remat->DefFrequency : MinFreq;
+ uint64_t UseOrMax = Remat->UseFrequency ? Remat->UseFrequency : MaxFreq;
+ uint64_t MaxDiff = MaxFreq - MinFreq;
+ // This is equivalent to (2 * MaxDiff) / 2^NumBitsLatency.
+ uint64_t RescaleDenom = MaxDiff >> (FreqDiffWidth - 1);
+ RescaleDenom = std::max(RescaleDenom, (uint64_t)1);
+ // The difference between defining and using frequency is in the range
+ // [-MaxDiff, MaxDiff], shift it to [0,2 x MaxDiff] to stay in the positive
+ // range, then rescale to [0, 2^NumBitsLatency - 1]
+ return (MaxDiff + (DefOrMin - UseOrMax)) / RescaleDenom;
}
void PreRARematStage::ScoredRemat::update(const BitVector &TargetRegions,
ArrayRef<GCNRPTarget> RPTargets,
ArrayRef<uint64_t> RegionFreq,
bool ReduceSpill) {
- // Exit early if no target region intersects with the registers's live
- // regions.
- if (!Remat->intersectWithTarget(TargetRegions))
- return setUselessRemat();
- resetScore();
-
- // When the stage is trying to reduce spilling, we want to pick
- // rematerialization candidates that will be beneficial to latency. When it is
- // trying to increase occupancy, we are fine increasing latency to try to
- // reduce RP.
- // FIXME: In the increasing occupancy case, we should be able to incorporate
- // the latency loss induced by rematerializations into the final score. It
- // seems possible to very roughly estimate the overall kernel latency upside
- // we get by increasing occupancy and compare it to the latency hit each wave
- // will be subjected to.
- if (ReduceSpill) {
- // If we don't know the latency gain, we still consider the
- // rematerialization potentially beneficial because we don't want to miss
- // rematerialization opportunities and rematerializing is in most cases
- // cheaper than spilling. We still give a bonus to remats for which we are
- // able to do the calculation.
- if (!hasUnknownLatencyGain()) {
- if (RematLatencyGainOverSpill < 0)
- return setUselessRemat();
- setKnownLatencyGain();
- }
- }
+ setNullScore();
+ if (!Remat->maybeBeneficial(TargetRegions, RPTargets))
+ return;
- // The estimated RP reduction is proportional to the total frequency in target
- // regions where the register is live.
- Register Reg = Remat->DefMI->getOperand(0).getReg();
- ScoreTy RPScore = 0;
+ Register Reg = Remat->getReg();
+ uint64_t MaxFreq = 0;
+ ScoreTy NumBenefitingRegions = 0;
for (unsigned I : TargetRegions.set_bits()) {
- unsigned Freq = std::max(RegionFreq[I], static_cast<uint64_t>(1));
- if (Remat->isBeneficialRegion(I))
- Score += WeightRP * RPTargets[I].isSaveBeneficial(Reg) * Freq;
- else if (Remat->isMaybeBeneficialRegion(I))
- Score += WeightRPMaybe * RPTargets[I].isSaveBeneficial(Reg) * Freq;
- }
+ if (!Remat->Live[I] || !RPTargets[I].isSaveBeneficial(Reg))
+ continue;
+ bool UnusedLT = Remat->isUnusedLiveThrough(I);
+
+ // Regions in which RP is guaranteed to decrease have more weight.
+ NumBenefitingRegions += UnusedLT ? 2 : 1;
- // The estimated RP reduction is directly proportional to the size of the
- // rematerializable register.
- setRPScore(RPScore * NumRegs);
+ if (ReduceSpill) {
+ uint64_t Freq = RegionFreq[I];
+ if (!UnusedLT) {
+ // Apply a frequency penalty in regions in which we are not sure that RP
+ // will decrease.
+ Freq /= 2;
+ }
+ MaxFreq = std::max(MaxFreq, Freq);
+ }
+ }
+ setMaxFreqScore(MaxFreq);
+ setFreqDiffScore(FreqDiff);
+ setRegionImpactScore(NumBenefitingRegions * NumRegs);
}
-MachineInstr *PreRARematStage::rematerialize(const RematReg &Remat,
- BitVector &RecomputeRP) {
+void PreRARematStage::rematerialize(const RematReg &Remat,
+ BitVector &RecomputeRP,
+ RollbackInfo *Rollback) {
const SIInstrInfo *TII = MF.getSubtarget<GCNSubtarget>().getInstrInfo();
MachineInstr &DefMI = *Remat.DefMI;
Register Reg = DefMI.getOperand(0).getReg();
@@ -2084,8 +2081,11 @@ MachineInstr *PreRARematStage::rematerialize(const RematReg &Remat,
MachineBasicBlock::iterator InsertPos = Remat.UseMI;
TII->reMaterialize(*InsertPos->getParent(), InsertPos, NewReg, 0, DefMI,
*DAG.TRI);
+ MachineInstr *RematMI = &*std::prev(InsertPos);
Remat.UseMI->substituteRegister(Reg, NewReg, 0, *DAG.TRI);
- MachineInstr *NewMI = Remat.insertMI(Remat.UseRegion, InsertPos, DAG);
+ Remat.insertMI(Remat.UseRegion, RematMI, DAG);
+ if (Rollback)
+ Rollback->RematMI = RematMI;
// Remove the register from all regions where it is a live-in or live-out
// and adjust RP targets.
@@ -2121,41 +2121,39 @@ MachineInstr *PreRARematStage::rematerialize(const RematReg &Remat,
}
#endif
- // This save is exact in beneficial regions but optimistic in all other
- // regions where the register is live.
+ // This save is guaranteed in regions in which the register is live-through
+ // and unused but optimistic in all other regions where the register is
+ // live.
RPTargets[I].saveReg(Reg, Remat.Mask, DAG.MRI);
DAG.LiveIns[I].erase(Reg);
DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I).erase(Reg);
- if (!Remat.isBeneficialRegion(I))
+ if (!Remat.isUnusedLiveThrough(I))
RecomputeRP.set(I);
}
- DAG.deleteMI(MIRegion.at(&DefMI), &DefMI);
+ DAG.deleteMI(Remat.DefRegion, &DefMI);
RescheduleRegions |= Remat.Live;
- return NewMI;
}
-void PreRARematStage::rollback(const RollbackReg &Rollback) const {
+void PreRARematStage::rollback(const RollbackInfo &Rollback,
+ BitVector &RecomputeRP) const {
const SIInstrInfo *TII = MF.getSubtarget<GCNSubtarget>().getInstrInfo();
- auto &[RematMI, Remat] = Rollback;
-
- // Recreate the original MI from the first rematerialization. Any
- // rematerialization could do, this is just a simple way to do this.
- unsigned DefRegion = MIRegion.at(Remat->DefMI);
- MachineBasicBlock *MBB = RegionBB[DefRegion];
+ auto &[Remat, RematMI] = Rollback;
+ MachineBasicBlock *MBB = RegionBB[Remat->DefRegion];
Register Reg = RematMI->getOperand(0).getReg();
Register NewReg = DAG.MRI.cloneVirtualRegister(Reg);
// Re-rematerialize MI in its original region. Note that it may not be
// rematerialized exactly in the same position as originally within the
// region, but it should not matter much.
- MachineBasicBlock::iterator InsertPos(DAG.Regions[DefRegion].second);
+ MachineBasicBlock::iterator InsertPos(DAG.Regions[Remat->DefRegion].second);
TII->reMaterialize(*MBB, InsertPos, NewReg, 0, *RematMI, *DAG.TRI);
- REMAT_DEBUG(dbgs() << '[' << DefRegion << "] Re-rematerialized as "
- << *std::prev(InsertPos));
+ MachineInstr *ReRematMI = &*std::prev(InsertPos);
+ REMAT_DEBUG(dbgs() << '[' << Remat->DefRegion << "] Re-rematerialized as "
+ << *ReRematMI);
Remat->UseMI->substituteRegister(Reg, NewReg, 0, *DAG.TRI);
DAG.deleteMI(Remat->UseRegion, RematMI);
- Remat->insertMI(DefRegion, InsertPos, DAG);
+ Remat->insertMI(Remat->DefRegion, ReRematMI, DAG);
// Re-add the register as a live-in/live-out in all regions it used to be
// one in.
@@ -2164,6 +2162,7 @@ void PreRARematStage::rollback(const RollbackReg &Rollback) const {
DAG.LiveIns[I].insert(LiveReg);
for (unsigned I : Remat->LiveOut.set_bits())
DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I).insert(LiveReg);
+ RecomputeRP |= Remat->Live;
}
void PreRARematStage::unsetSatisifedRPTargets(const BitVector &Regions) {
@@ -2224,41 +2223,20 @@ void PreRARematStage::finalizeGCNSchedStage() {
// Rollback, then recompute pressure in all affected regions.
REMAT_DEBUG(dbgs() << "==== ROLLBACK ====\n");
- BitVector ImpactedRegions(DAG.Regions.size());
- for (const RollbackReg &Rollback : Rollbackable) {
- rollback(Rollback);
- ImpactedRegions |= Rollback.second->Live;
- }
- for (unsigned I : ImpactedRegions.set_bits())
+ BitVector RecomputeRP(DAG.Regions.size());
+ for (const RollbackInfo &Rollback : Rollbacks)
+ rollback(Rollback, RecomputeRP);
+ for (unsigned I : RecomputeRP.set_bits())
DAG.Pressure[I] = DAG.getRealRegPressure(I);
GCNSchedStage::finalizeGCNSchedStage();
}
-void GCNScheduleDAGMILive::updateRegionBoundaries(
- RegionBoundaries &RegionBounds, MachineBasicBlock::iterator MI,
- MachineInstr *NewMI) {
- assert((!NewMI || NewMI != RegionBounds.second) &&
- "cannot remove at region end");
-
- if (RegionBounds.first == RegionBounds.second) {
- assert(NewMI && "cannot remove from an empty region");
- RegionBounds.first = NewMI;
- return;
- }
-
- // We only care for modifications at the beginning of a non-empty region since
- // the upper region boundary is exclusive.
- if (MI != RegionBounds.first)
- return;
- if (!NewMI)
- RegionBounds.first = std::next(MI); // Removal
- else
- RegionBounds.first = NewMI; // Insertion
-}
-
void GCNScheduleDAGMILive::deleteMI(unsigned RegionIdx, MachineInstr *MI) {
- updateRegionBoundaries(Regions[RegionIdx], MI, nullptr);
+ // It's not possible for the deleted instruction to be upper region boundary
+ // since we don't delete region terminators.
+ if (Regions[RegionIdx].first == MI)
+ Regions[RegionIdx].first = std::next(MachineBasicBlock::iterator(MI));
LIS->removeInterval(MI->getOperand(0).getReg());
LIS->RemoveMachineInstrFromMaps(*MI);
MI->eraseFromParent();
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 2cf8ec993092b..3ddd9f91ca454 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -287,14 +287,6 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
// Compute and cache live-ins and pressure for all regions in block.
void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB);
- /// If necessary, updates a region's boundaries following insertion ( \p NewMI
- /// != nullptr) or removal ( \p NewMI == nullptr) of a \p MI in the region.
- /// For an MI removal, this must be called before the MI is actually erased
- /// from its parent MBB.
- void updateRegionBoundaries(RegionBoundaries &RegionBounds,
- MachineBasicBlock::iterator MI,
- MachineInstr *NewMI);
-
void runSchedStages();
std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID);
@@ -462,73 +454,49 @@ class PreRARematStage : public GCNSchedStage {
MachineInstr *DefMI;
/// Single user of the rematerializable register.
MachineInstr *UseMI;
- /// Using region.
- unsigned UseRegion;
/// Regions in which the register is live-in/live-out/live anywhere.
BitVector LiveIn, LiveOut, Live;
/// The rematerializable register's lane bitmask.
LaneBitmask Mask;
- /// Frequency of region defining/using the register. 0 when unknown.
- unsigned DefFrequency, UseFrequency;
+ /// Defining and using regions.
+ unsigned DefRegion, UseRegion;
+ /// Frequency of defining/using regions. 0 when unknown.
+ uint64_t DefFrequency, UseFrequency;
RematReg(MachineInstr *DefMI, MachineInstr *UseMI,
GCNScheduleDAGMILive &DAG,
const DenseMap<MachineInstr *, unsigned> &MIRegion,
ArrayRef<uint64_t> RegionFreq);
- /// Returns whether the regions at which the register is live intersects
- /// with the \p Target regions.
- bool intersectWithTarget(BitVector Target) const {
- Target &= Live;
- return Target.any();
- }
+ /// Returns the rematerializable register. Do not call after deleting the
+ /// original defining instruction.
+ Register getReg() const { return DefMI->getOperand(0).getReg(); }
- /// Returns whether is is always beneficial to rematerialize this register.
- /// These are rematerializations that never move instructions into higher
- /// frequency regions and at least shorten live intervals, so they are
- /// always useful irrespective of RP targets.
- bool isAlwaysBeneficial() const {
- // When the using region is executed a single time, we know
- // rematerializing will be beneficial whatever the defining region's
- // frequency.
- if (UseFrequency == 1)
- return true;
- // When there is uncertainty on the defining or using frequency, we err on
- // the conservative side and do not consider the rematerialization always
- // beneficial.
- if (!DefFrequency || !UseFrequency)
- return false;
- return UseFrequency <= DefFrequency;
- }
+ /// Determines whether this rematerialization may be beneficial in at least
+ /// one target region.
+ bool maybeBeneficial(const BitVector &TargetRegions,
+ ArrayRef<GCNRPTarget> RPTargets) const;
- /// Determines whether rematerializing the register is guaranteed to reduce
- /// pressure in the region.
- bool isBeneficialRegion(unsigned I) const {
+ /// Determines if the register is both unused and live-through in region \p
+ /// I. This guarantees that rematerializing it will reduce RP in the region.
+ bool isUnusedLiveThrough(unsigned I) const {
assert(I < Live.size() && "region index out of range");
return LiveIn[I] && LiveOut[I] && I != UseRegion;
}
- /// Determines whether rematerializing the register can but is not
- /// guaranteed to reduce pressure in the region.
- bool isMaybeBeneficialRegion(unsigned I) const {
- assert(I < Live.size() && "region index out of range");
- return Live[I] && !isBeneficialRegion(I);
- }
-
/// Updates internal structures following a MI rematerialization. Part of
/// the stage instead of the DAG because it makes assumptions that are
/// specific to the rematerialization process.
- MachineInstr *insertMI(unsigned RegionIdx,
- MachineBasicBlock::iterator InsertPos,
- GCNScheduleDAGMILive &DAG) const;
+ void insertMI(unsigned RegionIdx, MachineInstr *RematMI,
+ GCNScheduleDAGMILive &DAG) const;
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- void print(const DenseMap<MachineInstr *, unsigned> &MIRegion) const;
+ void print() const;
#endif
};
/// A scored rematerializable register. Higher scores indicate more beneficial
- /// rematerializations. Non-positive scores indicate the rematerialization is
+ /// rematerializations. A null score indicate the rematerialization is
/// not helpful to reduce RP in target regions.
struct ScoredRemat {
/// The rematerializable register under consideration.
@@ -536,14 +504,16 @@ class PreRARematStage : public GCNSchedStage {
/// This only initializes state-independent characteristics of \p Remat, not
/// the actual score.
- ScoredRemat(const RematReg *Remat, const GCNScheduleDAGMILive &DAG);
+ ScoredRemat(const RematReg *Remat, uint64_t MinFreq, uint64_t MaxFreq,
+ const GCNScheduleDAGMILive &DAG);
/// Updates the rematerialization's score w.r.t. the current \p RPTargets.
/// \p RegionFreq indicates the frequency of each region
void update(const BitVector &TargetRegions, ArrayRef<GCNRPTarget> RPTargets,
ArrayRef<uint64_t> RegionFreq, bool ReduceSpill);
- int getScore() const { return Score; }
+ /// Returns whether the current score is null.
+ bool hasNullScore() const { return !Score; }
bool operator<(const ScoredRemat &O) const {
// Break ties using pointer to rematerializable register. Since
@@ -554,49 +524,68 @@ class PreRARematStage : public GCNSchedStage {
return Score < O.Score;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ void print() const;
+#endif
+
private:
- /// Per-region contribution weights to RP score depending on whether RP is
- /// guaranteed or only likely to be reduced in the region. Only their
- /// relative value w.r.t. one another matter.
- static constexpr int WeightRP = 2, WeightRPMaybe = 1;
+ /// Bitwidths for score components.
+ static constexpr unsigned MaxFreqWidth = 32, FreqDiffWidth = 16,
+ RegionImpactWidth = 16;
/// Number of 32-bit registers this rematerialization covers.
const unsigned NumRegs;
- /// Latency gain induced by rematerializing the register over spilling its
- /// defining instruction.
- const int RematLatencyGainOverSpill;
-
- /// Whether we can estimate the latency gain of rematerialazing over
- /// spilling; this requires knowing defining/using region frequencies.
- bool hasUnknownLatencyGain() const {
- return !Remat->DefFrequency || !Remat->UseFrequency;
- }
+ /// Frequency difference between defining and using regions, normalized to
+ /// the maximum possible difference and rescaled to the representable range
+ /// in the score.
+ const uint64_t FreqDiff;
- using ScoreTy = int32_t;
+ using ScoreTy = uint64_t;
/// Overall rematerialization score. Scoring components are mapped to bit
/// ranges in the overall score.
///
- /// [31:1] : estimated RP reduction score
- /// [0] : known latency gain
- ScoreTy Score;
+ /// [63:32] : maximum frequency in benefiting target region (spilling only)
+ /// [31:16] : frequency difference between defining and using region
+ /// [15: 0] : number of benefiting regions times register size
+ ScoreTy Score = 0;
- void resetScore() { Score = 0; }
+ void setNullScore() { Score = 0; }
- void setUselessRemat() { Score = std::numeric_limits<ScoreTy>::min(); }
+ void setMaxFreqScore(ScoreTy MaxFreq) {
+ MaxFreq = std::min(
+ static_cast<ScoreTy>(std::numeric_limits<uint32_t>::max()), MaxFreq);
+ Score |= MaxFreq << (FreqDiffWidth + RegionImpactWidth);
+ }
- void setKnownLatencyGain() { Score |= 1; }
+ void setFreqDiffScore(ScoreTy FreqDiff) {
+ FreqDiff = std::min(
+ static_cast<ScoreTy>(std::numeric_limits<uint16_t>::max()), FreqDiff);
+ Score |= FreqDiff << RegionImpactWidth;
+ }
- void setRPScore(ScoreTy RPScore) { Score |= RPScore << 1; }
+ void setRegionImpactScore(ScoreTy RegionImpact) {
+ RegionImpact =
+ std::min(static_cast<ScoreTy>(std::numeric_limits<uint16_t>::max()),
+ RegionImpact);
+ Score |= RegionImpact;
+ }
unsigned getNumRegs(const GCNScheduleDAGMILive &DAG) const;
- unsigned getLatencyGain(const GCNScheduleDAGMILive &DAG) const;
+ uint64_t getFreqDiff(uint64_t MinFreq, uint64_t MaxFreq) const;
+ };
+
+ /// Holds enough information to rollback a rematerialization decision post
+ /// re-scheduling.
+ struct RollbackInfo {
+ /// The rematerializable register under consideration.
+ const RematReg *Remat;
+ /// The rematerialized MI replacing the original defining MI.
+ MachineInstr *RematMI;
+
+ RollbackInfo(const RematReg *Remat) : Remat(Remat) {}
};
- /// Maps all MIs (except lone terminators, which are not part of any region)
- /// to their parent region. Non-lone terminators are considered part of the
- /// region they delimitate.
- DenseMap<MachineInstr *, unsigned> MIRegion;
/// Parent MBB to each region, in region order.
SmallVector<MachineBasicBlock *> RegionBB;
@@ -613,13 +602,9 @@ class PreRARematStage : public GCNSchedStage {
/// List of rematerializable registers.
SmallVector<RematReg, 16> RematRegs;
-
- using RollbackReg = std::pair<MachineInstr *, const RematReg *>;
/// List of rematerializations to rollback if rematerialization does not end
- /// up being beneficial. Each element pairs the MI created during
- /// rematerialization to the original rematerializable register.
- SmallVector<RollbackReg> Rollbackable;
-
+ /// up being beneficial.
+ SmallVector<RollbackInfo> Rollbacks;
/// After successful stage initialization, indicates which regions should be
/// rescheduled.
BitVector RescheduleRegions;
@@ -627,8 +612,8 @@ class PreRARematStage : public GCNSchedStage {
/// Determines the stage's objective (increasing occupancy or reducing
/// spilling, set in \ref TargetOcc). Defines \ref RPTargets in all regions to
/// achieve that objective and mark those that don't achieve it in \ref
- /// TargetRegions.
- void setObjective();
+ /// TargetRegions. Returns whether there is any target region.
+ bool setObjective();
/// Unsets target regions in \p Regions whose RP target has been reached.
void unsetSatisifedRPTargets(const BitVector &Regions);
@@ -639,20 +624,26 @@ class PreRARematStage : public GCNSchedStage {
bool updateAndVerifyRPTargets(const BitVector &Regions);
/// Collects all rematerializable registers and appends them to \ref
- /// RematRegs. \p RegionFreq contains the frequency of each region, 0
- /// indicating an unknown frequency. Returns whether any rematerializable
- /// register was found.
- bool collectRematRegs(ArrayRef<uint64_t> RegionFreq);
+ /// RematRegs. \p MIRegion maps MIs to their region and \p RegionFreq contains
+ /// the frequency of each region, 0 indicating an unknown frequency. Returns
+ /// whether any rematerializable register was found.
+ bool collectRematRegs(const DenseMap<MachineInstr *, unsigned> &MIRegion,
+ ArrayRef<uint64_t> RegionFreq);
/// Rematerializes \p Remat. This removes the rematerialized register from
/// live-in/out lists in the DAG and updates RP targets in all affected
/// regions, which are also marked in \ref RescheduleRegions. Regions in which
- /// RP savings are not guaranteed are set in \p RecomputeRP. Returns the newly
- /// created MI.
- MachineInstr *rematerialize(const RematReg &Remat, BitVector &RecomputeRP);
-
- /// Rollbacks rematerialization \p Rollback.
- void rollback(const RollbackReg &Rollback) const;
+ /// RP savings are not guaranteed are set in \p RecomputeRP. When \p Rollback
+ /// is non-null, fills it with required information to be able to rollback the
+ /// rematerialization post-rescheduling.
+ void rematerialize(const RematReg &Remat, BitVector &RecomputeRP,
+ RollbackInfo *Rollback);
+
+ /// Rollbacks the rematerialization decision represented by \p Rollback. This
+ /// update live-in/out lists in the DAG but does not update cached register
+ /// pressures. Regions in which RP may be impacted are marked in \ref
+ /// RecomputeRP.
+ void rollback(const RollbackInfo &Rollback, BitVector &RecomputeRP) const;
/// Whether the MI is rematerializable
bool isReMaterializable(const MachineInstr &MI);
@@ -678,7 +669,6 @@ class PreRARematStage : public GCNSchedStage {
const unsigned NumRegions = DAG.Regions.size();
RPTargets.reserve(NumRegions);
RegionBB.reserve(NumRegions);
- MIRegion.reserve(MF.getInstructionCount());
}
};
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll
index 5e079c94a6381..8a53c862371cf 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll
@@ -885,6 +885,7 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
; SI-NEXT: s_mov_b64 s[0:1], exec
; SI-NEXT: s_wqm_b64 exec, exec
; SI-NEXT: v_cvt_i32_f32_e32 v0, v0
+; SI-NEXT: s_mov_b32 s4, 0
; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc
; SI-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
@@ -893,11 +894,10 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; SI-NEXT: s_cbranch_scc0 .LBB7_9
; SI-NEXT: ; %bb.2: ; %.demote0
-; SI-NEXT: s_wqm_b64 s[4:5], s[0:1]
-; SI-NEXT: s_and_b64 exec, exec, s[4:5]
+; SI-NEXT: s_wqm_b64 s[6:7], s[0:1]
+; SI-NEXT: s_and_b64 exec, exec, s[6:7]
; SI-NEXT: .LBB7_3: ; %.continue0.preheader
; SI-NEXT: s_or_b64 exec, exec, s[2:3]
-; SI-NEXT: s_mov_b32 s4, 0
; SI-NEXT: s_mov_b64 s[2:3], 0
; SI-NEXT: v_mov_b32_e32 v0, s4
; SI-NEXT: s_branch .LBB7_5
@@ -951,6 +951,7 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
; GFX9-NEXT: s_mov_b64 s[0:1], exec
; GFX9-NEXT: s_wqm_b64 exec, exec
; GFX9-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX9-NEXT: s_mov_b32 s4, 0
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX9-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
@@ -959,11 +960,10 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; GFX9-NEXT: s_cbranch_scc0 .LBB7_9
; GFX9-NEXT: ; %bb.2: ; %.demote0
-; GFX9-NEXT: s_wqm_b64 s[4:5], s[0:1]
-; GFX9-NEXT: s_and_b64 exec, exec, s[4:5]
+; GFX9-NEXT: s_wqm_b64 s[6:7], s[0:1]
+; GFX9-NEXT: s_and_b64 exec, exec, s[6:7]
; GFX9-NEXT: .LBB7_3: ; %.continue0.preheader
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
-; GFX9-NEXT: s_mov_b32 s4, 0
; GFX9-NEXT: s_mov_b64 s[2:3], 0
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: s_branch .LBB7_5
@@ -1080,6 +1080,7 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
; GFX10-64-NEXT: s_mov_b64 s[0:1], exec
; GFX10-64-NEXT: s_wqm_b64 exec, exec
; GFX10-64-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX10-64-NEXT: s_mov_b32 s4, 0
; GFX10-64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; GFX10-64-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX10-64-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
@@ -1088,12 +1089,11 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
; GFX10-64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; GFX10-64-NEXT: s_cbranch_scc0 .LBB7_9
; GFX10-64-NEXT: ; %bb.2: ; %.demote0
-; GFX10-64-NEXT: s_wqm_b64 s[4:5], s[0:1]
-; GFX10-64-NEXT: s_and_b64 exec, exec, s[4:5]
+; GFX10-64-NEXT: s_wqm_b64 s[6:7], s[0:1]
+; GFX10-64-NEXT: s_and_b64 exec, exec, s[6:7]
; GFX10-64-NEXT: .LBB7_3: ; %.continue0.preheader
; GFX10-64-NEXT: s_or_b64 exec, exec, s[2:3]
-; GFX10-64-NEXT: s_mov_b32 s2, 0
-; GFX10-64-NEXT: v_mov_b32_e32 v0, s2
+; GFX10-64-NEXT: v_mov_b32_e32 v0, s4
; GFX10-64-NEXT: s_mov_b64 s[2:3], 0
; GFX10-64-NEXT: s_branch .LBB7_5
; GFX10-64-NEXT: .LBB7_4: ; %.continue1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll
index f1942130fc972..b33b8a7d8cd72 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll
@@ -6,34 +6,34 @@ define amdgpu_kernel void @v3i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
; GFX906: ; %bb.0: ; %entry
; GFX906-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX906-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX906-NEXT: v_lshlrev_b32_e32 v1, 2, v0
+; GFX906-NEXT: v_lshlrev_b32_e32 v2, 2, v0
; GFX906-NEXT: v_mov_b32_e32 v3, 8
-; GFX906-NEXT: v_mov_b32_e32 v4, 16
+; GFX906-NEXT: v_mov_b32_e32 v5, 16
; GFX906-NEXT: s_waitcnt lgkmcnt(0)
-; GFX906-NEXT: global_load_dword v2, v1, s[0:1]
+; GFX906-NEXT: global_load_dword v4, v2, s[0:1]
+; GFX906-NEXT: v_mov_b32_e32 v1, 0xff
; GFX906-NEXT: v_cmp_gt_u32_e32 vcc, 15, v0
; GFX906-NEXT: s_waitcnt vmcnt(0)
-; GFX906-NEXT: v_and_b32_e32 v5, 0xff, v2
-; GFX906-NEXT: v_lshlrev_b32_sdwa v6, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
-; GFX906-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
-; GFX906-NEXT: v_or3_b32 v2, v5, v6, v2
+; GFX906-NEXT: v_and_b32_e32 v6, 0xff, v4
+; GFX906-NEXT: v_lshlrev_b32_sdwa v7, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
+; GFX906-NEXT: v_lshlrev_b32_sdwa v4, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
+; GFX906-NEXT: v_or3_b32 v4, v6, v7, v4
; GFX906-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX906-NEXT: s_cbranch_execz .LBB0_2
; GFX906-NEXT: ; %bb.1: ; %bb.1
-; GFX906-NEXT: global_load_dword v0, v1, s[2:3]
+; GFX906-NEXT: global_load_dword v0, v2, s[2:3]
; GFX906-NEXT: s_waitcnt vmcnt(0)
-; GFX906-NEXT: v_and_b32_e32 v1, 0xff, v0
-; GFX906-NEXT: v_lshlrev_b32_sdwa v2, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
-; GFX906-NEXT: v_lshlrev_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
-; GFX906-NEXT: v_or3_b32 v2, v1, v2, v0
+; GFX906-NEXT: v_and_b32_e32 v2, 0xff, v0
+; GFX906-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
+; GFX906-NEXT: v_lshlrev_b32_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
+; GFX906-NEXT: v_or3_b32 v4, v2, v3, v0
; GFX906-NEXT: .LBB0_2: ; %bb.2
; GFX906-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v2
+; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v4
; GFX906-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX906-NEXT: v_lshlrev_b16_e32 v0, 8, v0
-; GFX906-NEXT: v_mov_b32_e32 v1, 0xff
-; GFX906-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX906-NEXT: v_and_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX906-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX906-NEXT: v_and_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX906-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
diff --git a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
index ed06594dad361..675acd0eedfc5 100644
--- a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
@@ -65,9 +65,9 @@ define amdgpu_kernel void @call_no_wait_after_call(ptr addrspace(1) %ptr, i32) #
; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_mov_b32 s32, 0
+; GCN-NEXT: v_mov_b32_e32 v40, 0
; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9]
-; GCN-NEXT: v_mov_b32_e32 v0, 0
-; GCN-NEXT: global_store_dword v0, v0, s[34:35]
+; GCN-NEXT: global_store_dword v40, v40, s[34:35]
; GCN-NEXT: s_endpgm
call void @func(i32 0)
store i32 0, ptr addrspace(1) %ptr
@@ -88,9 +88,9 @@ define amdgpu_kernel void @call_no_wait_after_call_return_val(ptr addrspace(1) %
; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_mov_b32 s32, 0
+; GCN-NEXT: v_mov_b32_e32 v40, 0
; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9]
-; GCN-NEXT: v_mov_b32_e32 v1, 0
-; GCN-NEXT: global_store_dword v1, v0, s[34:35]
+; GCN-NEXT: global_store_dword v40, v0, s[34:35]
; GCN-NEXT: s_endpgm
%rv = call i32 @func.return(i32 0)
store i32 %rv, ptr addrspace(1) %ptr
diff --git a/llvm/test/CodeGen/AMDGPU/dbg-value-starts-sched-region.mir b/llvm/test/CodeGen/AMDGPU/dbg-value-starts-sched-region.mir
index 4a03416f56cc9..0785fe31d63b4 100644
--- a/llvm/test/CodeGen/AMDGPU/dbg-value-starts-sched-region.mir
+++ b/llvm/test/CodeGen/AMDGPU/dbg-value-starts-sched-region.mir
@@ -10,10 +10,10 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: sched
- ; CHECK: S_NOP 0
+ ; CHECK: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: DBG_VALUE
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: dead [[COPY:%[0-9]+]]:sgpr_32 = COPY [[DEF]]
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_ENDPGM 0
diff --git a/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir b/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir
index 5444c2b3eda01..156979d6d06a5 100644
--- a/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir
+++ b/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir
@@ -28,6 +28,14 @@ body: |
; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_MUL_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 1082130432, [[DEF]], implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
@@ -43,42 +51,34 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B32_e32_]]
- ; CHECK-NEXT: [[V_MUL_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 [[DEF1]], [[DEF1]], implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MUL_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 [[DEF1]], [[DEF1]], implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MUL_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_]], implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MUL_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 [[DEF6]], [[DEF6]], implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MUL_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 [[DEF6]], [[DEF6]], implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MUL_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_]], implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[V_ADD_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e32 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_]], implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MUL_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 [[DEF1]], [[DEF1]], implicit $mode, implicit $exec
- ; CHECK-NEXT: dead [[V_MUL_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 [[V_MUL_F32_e32_3]], [[DEF4]], implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MUL_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 [[DEF6]], [[DEF6]], implicit $mode, implicit $exec
+ ; CHECK-NEXT: dead [[V_MUL_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 [[V_MUL_F32_e32_4]], [[DEF12]], implicit $mode, implicit $exec
; CHECK-NEXT: dead [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAC_F32_e32 [[V_ADD_F32_e32_]], [[COPY]], [[V_MOV_B32_e32_1]], implicit $mode, implicit $exec
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; CHECK-NEXT: $sgpr4 = IMPLICIT_DEF
- ; CHECK-NEXT: $vgpr0 = COPY [[DEF3]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[DEF10]]
; CHECK-NEXT: $vgpr0 = COPY [[V_MOV_B32_e32_]]
- ; CHECK-NEXT: $vgpr1 = COPY [[DEF1]]
- ; CHECK-NEXT: $vgpr0 = COPY [[V_MUL_F32_e32_]]
- ; CHECK-NEXT: $vgpr1 = COPY [[V_MUL_F32_e32_1]]
- ; CHECK-NEXT: $vgpr2 = COPY [[V_MUL_F32_e32_2]]
- ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL [[DEF5]], @foo, csr_amdgpu, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $sgpr4, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit-def $vgpr0
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[V_MUL_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 1082130432, [[DEF]], implicit $mode, implicit $exec
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[V_ADD_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e32 [[V_MUL_F32_e32_5]], [[DEF6]], implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_ADD_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAC_F32_e32 [[DEF8]], [[DEF7]], [[V_ADD_F32_e32_1]], implicit $mode, implicit $exec
- ; CHECK-NEXT: dead [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_F32_e64 0, [[V_ADD_F32_e32_1]], 0, [[DEF9]], 0, [[DEF]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: dead [[V_MAD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_F32_e64 0, [[V_ADD_F32_e32_1]], 0, [[DEF11]], 0, [[DEF10]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: dead [[V_MAD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_F32_e64 0, [[V_ADD_F32_e32_1]], 0, [[DEF13]], 0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = COPY [[DEF6]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[V_MUL_F32_e32_1]]
+ ; CHECK-NEXT: $vgpr1 = COPY [[V_MUL_F32_e32_2]]
+ ; CHECK-NEXT: $vgpr2 = COPY [[V_MUL_F32_e32_3]]
+ ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL [[DEF13]], @foo, csr_amdgpu, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $sgpr4, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit-def $vgpr0
+ ; CHECK-NEXT: [[V_ADD_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e32 [[V_MUL_F32_e32_]], [[DEF7]], implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAC_F32_e32 [[DEF11]], [[DEF8]], [[V_ADD_F32_e32_1]], implicit $mode, implicit $exec
+ ; CHECK-NEXT: dead [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_F32_e64 0, [[V_ADD_F32_e32_1]], 0, [[DEF3]], 0, [[DEF]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: dead [[V_MAD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_F32_e64 0, [[V_ADD_F32_e32_1]], 0, [[DEF4]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: dead [[V_MAD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_F32_e64 0, [[V_ADD_F32_e32_1]], 0, [[DEF5]], 0, [[DEF2]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: GLOBAL_STORE_DWORD [[DEF14]], [[DEF2]], 0, 0, implicit $exec
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD [[DEF14]], [[DEF9]], 0, 0, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1
diff --git a/llvm/test/CodeGen/AMDGPU/ds_read2-gfx1250.ll b/llvm/test/CodeGen/AMDGPU/ds_read2-gfx1250.ll
index 0e64b12ba3716..23d2b18f5311b 100644
--- a/llvm/test/CodeGen/AMDGPU/ds_read2-gfx1250.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds_read2-gfx1250.ll
@@ -817,7 +817,7 @@ define amdgpu_kernel void @ds_read_call_read(ptr addrspace(1) %out, ptr addrspac
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_load_b96 s[36:38], s[4:5], 0x0
; GFX1250-NEXT: v_and_b32_e32 v1, 0x3ff, v0
-; GFX1250-NEXT: v_mov_b32_e32 v31, v0
+; GFX1250-NEXT: v_dual_mov_b32 v42, 0 :: v_dual_mov_b32 v31, v0
; GFX1250-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1250-NEXT: s_add_nc_u64 s[8:9], s[4:5], 12
; GFX1250-NEXT: s_mov_b64 s[12:13], void_func_void at abs64
@@ -831,8 +831,8 @@ define amdgpu_kernel void @ds_read_call_read(ptr addrspace(1) %out, ptr addrspac
; GFX1250-NEXT: s_swap_pc_i64 s[30:31], s[12:13]
; GFX1250-NEXT: ds_load_b32 v0, v40 offset:4
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_add_nc_u32 v0, v41, v0
-; GFX1250-NEXT: global_store_b32 v1, v0, s[36:37]
+; GFX1250-NEXT: v_add_nc_u32_e32 v0, v41, v0
+; GFX1250-NEXT: global_store_b32 v42, v0, s[36:37]
; GFX1250-NEXT: s_endpgm
%x = call i32 @llvm.amdgcn.workitem.id.x()
%arrayidx0 = getelementptr i32, ptr addrspace(3) %arg, i32 %x
diff --git a/llvm/test/CodeGen/AMDGPU/ds_read2.ll b/llvm/test/CodeGen/AMDGPU/ds_read2.ll
index a89cd9ceff154..9f1b55ea3b1ef 100644
--- a/llvm/test/CodeGen/AMDGPU/ds_read2.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds_read2.ll
@@ -1384,8 +1384,8 @@ define amdgpu_kernel void @ds_read_call_read(ptr addrspace(1) %out, ptr addrspac
; GFX9-NEXT: s_add_u32 s8, s4, 12
; GFX9-NEXT: s_addc_u32 s9, s5, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: v_lshl_add_u32 v40, v0, 2, s6
-; GFX9-NEXT: ds_read_b32 v41, v40
+; GFX9-NEXT: v_lshl_add_u32 v41, v0, 2, s6
+; GFX9-NEXT: ds_read_b32 v42, v41
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
@@ -1395,12 +1395,12 @@ define amdgpu_kernel void @ds_read_call_read(ptr addrspace(1) %out, ptr addrspac
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: v_mov_b32_e32 v40, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-NEXT: ds_read_b32 v0, v40 offset:4
-; GFX9-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-NEXT: ds_read_b32 v0, v41 offset:4
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: v_add_u32_e32 v0, v41, v0
-; GFX9-NEXT: global_store_dword v1, v0, s[34:35]
+; GFX9-NEXT: v_add_u32_e32 v0, v42, v0
+; GFX9-NEXT: global_store_dword v40, v0, s[34:35]
; GFX9-NEXT: s_endpgm
%x = call i32 @llvm.amdgcn.workitem.id.x()
%arrayidx0 = getelementptr i32, ptr addrspace(3) %arg, i32 %x
diff --git a/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll b/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll
index fda27243625cf..c5db7a33f70e0 100644
--- a/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll
@@ -1894,6 +1894,7 @@ define void @test_dynamic_stackalloc_device_multiple_allocas(i32 %n, i32 %m) {
; GFX9-SDAG-NEXT: .LBB14_6: ; %bb.1
; GFX9-SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 2
; GFX9-SDAG-NEXT: v_and_b32_e32 v0, -16, v0
; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec
; GFX9-SDAG-NEXT: .LBB14_7: ; =>This Inner Loop Header: Depth=1
@@ -1911,8 +1912,7 @@ define void @test_dynamic_stackalloc_device_multiple_allocas(i32 %n, i32 %m) {
; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 1
; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], s33
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 2
-; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], s4
+; GFX9-SDAG-NEXT: buffer_store_dword v1, off, s[0:3], s4
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX9-SDAG-NEXT: s_mov_b32 s32, s34
; GFX9-SDAG-NEXT: s_mov_b32 s34, s14
@@ -2059,30 +2059,31 @@ define void @test_dynamic_stackalloc_device_multiple_allocas(i32 %n, i32 %m) {
; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v1
; GFX11-SDAG-NEXT: .LBB14_6: ; %bb.1
; GFX11-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s1
-; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15
+; GFX11-SDAG-NEXT: v_lshl_add_u32 v1, v0, 2, 15
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 2
; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_and_b32_e32 v0, -16, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_and_b32_e32 v1, -16, v1
; GFX11-SDAG-NEXT: .LBB14_7: ; =>This Inner Loop Header: Depth=1
; GFX11-SDAG-NEXT: s_ctz_i32_b32 s2, s1
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX11-SDAG-NEXT: v_readlane_b32 s3, v0, s2
+; GFX11-SDAG-NEXT: v_readlane_b32 s3, v1, s2
; GFX11-SDAG-NEXT: s_bitset0_b32 s1, s2
; GFX11-SDAG-NEXT: s_max_u32 s0, s0, s3
; GFX11-SDAG-NEXT: s_cmp_lg_u32 s1, 0
; GFX11-SDAG-NEXT: s_cbranch_scc1 .LBB14_7
; GFX11-SDAG-NEXT: ; %bb.8:
; GFX11-SDAG-NEXT: s_mov_b32 s1, s32
-; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2
-; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, s0, 5, s1
-; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s33 dlc
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 1
+; GFX11-SDAG-NEXT: v_lshl_add_u32 v1, s0, 5, s1
+; GFX11-SDAG-NEXT: scratch_store_b32 off, v2, s33 dlc
; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-SDAG-NEXT: scratch_store_b32 off, v2, s1 dlc
+; GFX11-SDAG-NEXT: scratch_store_b32 off, v0, s1 dlc
; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-SDAG-NEXT: s_mov_b32 s33, s7
-; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v0
+; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v1
; GFX11-SDAG-NEXT: s_mov_b32 s32, s34
; GFX11-SDAG-NEXT: s_mov_b32 s34, s8
+; GFX11-SDAG-NEXT: s_mov_b32 s33, s7
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_multiple_allocas:
diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll
index 0f5f9a54c11be..db32135939a5d 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll
+++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll
@@ -5,15 +5,14 @@ define amdgpu_gs i32 @main() {
; CHECK-LABEL: main:
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: s_bitcmp1_b32 0, 0
-; CHECK-NEXT: s_cselect_b32 s0, -1, 0
-; CHECK-NEXT: s_or_saveexec_b32 s1, -1
-; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; CHECK-NEXT: s_mov_b32 s0, 0
+; CHECK-NEXT: s_cselect_b32 s1, -1, 0
+; CHECK-NEXT: s_or_saveexec_b32 s2, -1
+; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s1
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; CHECK-NEXT: v_readfirstlane_b32 s0, v0
-; CHECK-NEXT: s_mov_b32 exec_lo, s1
-; CHECK-NEXT: s_mov_b32 s1, 0
-; CHECK-NEXT: s_wait_alu 0xfffe
-; CHECK-NEXT: s_or_b32 s0, s1, s0
+; CHECK-NEXT: v_readfirstlane_b32 s1, v0
+; CHECK-NEXT: s_mov_b32 exec_lo, s2
+; CHECK-NEXT: s_or_b32 s0, s0, s1
; CHECK-NEXT: s_wait_alu 0xfffe
; CHECK-NEXT: s_bitcmp1_b32 s0, 0
; CHECK-NEXT: s_cselect_b32 s0, -1, 0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll
index 8efcc55a58ef6..a7ebf458d2591 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll
@@ -244,6 +244,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-LABEL: divergent_value:
; GFX9DAGISEL: ; %bb.0: ; %entry
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -255,7 +256,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
@@ -282,6 +282,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-LABEL: divergent_value:
; GFX1064DAGISEL: ; %bb.0: ; %entry
; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -293,7 +294,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1064DAGISEL-NEXT: s_endpgm
@@ -320,6 +320,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-LABEL: divergent_value:
; GFX1032DAGISEL: ; %bb.0: ; %entry
; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0
; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -331,7 +332,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1032DAGISEL-NEXT: s_endpgm
@@ -358,6 +358,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-LABEL: divergent_value:
; GFX1164DAGISEL: ; %bb.0: ; %entry
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0
@@ -371,7 +372,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1164DAGISEL-NEXT: s_endpgm
@@ -400,7 +400,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-LABEL: divergent_value:
; GFX1132DAGISEL: ; %bb.0: ; %entry
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0
; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -412,7 +412,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
+; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
@@ -440,7 +440,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX12DAGISEL-LABEL: divergent_value:
; GFX12DAGISEL: ; %bb.0: ; %entry
; GFX12DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX12DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX12DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX12DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX12DAGISEL-NEXT: s_mov_b32 s2, 0
; GFX12DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -452,7 +452,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX12DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
; GFX12DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX12DAGISEL-NEXT: ; %bb.2:
-; GFX12DAGISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
+; GFX12DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX12DAGISEL-NEXT: s_wait_kmcnt 0x0
; GFX12DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX12DAGISEL-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll
index ec27806c2da85..f39dd867f9580 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll
@@ -168,6 +168,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-LABEL: divergent_value:
; GFX9DAGISEL: ; %bb.0: ; %entry
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s4, -1
; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -179,7 +180,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
@@ -206,6 +206,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-LABEL: divergent_value:
; GFX1064DAGISEL: ; %bb.0: ; %entry
; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, -1
; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -217,7 +218,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1064DAGISEL-NEXT: s_endpgm
@@ -244,6 +244,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-LABEL: divergent_value:
; GFX1032DAGISEL: ; %bb.0: ; %entry
; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, -1
; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -255,7 +256,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1032DAGISEL-NEXT: s_endpgm
@@ -282,6 +282,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-LABEL: divergent_value:
; GFX1164DAGISEL: ; %bb.0: ; %entry
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s4, -1
@@ -295,7 +296,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1164DAGISEL-NEXT: s_endpgm
@@ -324,7 +324,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-LABEL: divergent_value:
; GFX1132DAGISEL: ; %bb.0: ; %entry
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, -1
; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -336,7 +336,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
+; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll
index 3c84348ab7064..6f299ab8bb9cf 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll
@@ -168,6 +168,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-LABEL: divergent_value:
; GFX9DAGISEL: ; %bb.0: ; %entry
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_brev_b32 s4, 1
; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -179,7 +180,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
@@ -206,6 +206,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-LABEL: divergent_value:
; GFX1064DAGISEL: ; %bb.0: ; %entry
; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_brev_b32 s4, 1
; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -217,7 +218,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1064DAGISEL-NEXT: s_endpgm
@@ -244,6 +244,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-LABEL: divergent_value:
; GFX1032DAGISEL: ; %bb.0: ; %entry
; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032DAGISEL-NEXT: s_brev_b32 s2, 1
; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -255,7 +256,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1032DAGISEL-NEXT: s_endpgm
@@ -282,6 +282,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-LABEL: divergent_value:
; GFX1164DAGISEL: ; %bb.0: ; %entry
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_brev_b32 s4, 1
@@ -295,7 +296,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1164DAGISEL-NEXT: s_endpgm
@@ -324,7 +324,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-LABEL: divergent_value:
; GFX1132DAGISEL: ; %bb.0: ; %entry
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132DAGISEL-NEXT: s_brev_b32 s2, 1
; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -336,7 +336,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
+; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll
index 29a558c1ee169..3c4cbc74aedc1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll
@@ -168,6 +168,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-LABEL: divergent_value:
; GFX9DAGISEL: ; %bb.0: ; %entry
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_brev_b32 s4, -2
; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -179,7 +180,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
@@ -206,6 +206,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-LABEL: divergent_value:
; GFX1064DAGISEL: ; %bb.0: ; %entry
; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_brev_b32 s4, -2
; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -217,7 +218,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1064DAGISEL-NEXT: s_endpgm
@@ -244,6 +244,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-LABEL: divergent_value:
; GFX1032DAGISEL: ; %bb.0: ; %entry
; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032DAGISEL-NEXT: s_brev_b32 s2, -2
; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -255,7 +256,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1032DAGISEL-NEXT: s_endpgm
@@ -282,6 +282,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-LABEL: divergent_value:
; GFX1164DAGISEL: ; %bb.0: ; %entry
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_brev_b32 s4, -2
@@ -295,7 +296,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1164DAGISEL-NEXT: s_endpgm
@@ -324,7 +324,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-LABEL: divergent_value:
; GFX1132DAGISEL: ; %bb.0: ; %entry
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132DAGISEL-NEXT: s_brev_b32 s2, -2
; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -336,7 +336,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
+; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll
index 2a3119ea97ff9..d6ccf7ce2831d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll
@@ -168,6 +168,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-LABEL: divergent_value:
; GFX9DAGISEL: ; %bb.0: ; %entry
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -179,7 +180,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
@@ -206,6 +206,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-LABEL: divergent_value:
; GFX1064DAGISEL: ; %bb.0: ; %entry
; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -217,7 +218,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1064DAGISEL-NEXT: s_endpgm
@@ -244,6 +244,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-LABEL: divergent_value:
; GFX1032DAGISEL: ; %bb.0: ; %entry
; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0
; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -255,7 +256,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1032DAGISEL-NEXT: s_endpgm
@@ -282,6 +282,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-LABEL: divergent_value:
; GFX1164DAGISEL: ; %bb.0: ; %entry
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0
@@ -295,7 +296,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1164DAGISEL-NEXT: s_endpgm
@@ -324,7 +324,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-LABEL: divergent_value:
; GFX1132DAGISEL: ; %bb.0: ; %entry
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0
; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -336,7 +336,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
+; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll
index 5f1bcec126938..fab269ea8cfb9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll
@@ -259,6 +259,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-LABEL: divergent_value:
; GFX9DAGISEL: ; %bb.0: ; %entry
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -270,7 +271,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
@@ -297,6 +297,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-LABEL: divergent_value:
; GFX1064DAGISEL: ; %bb.0: ; %entry
; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -308,7 +309,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1064DAGISEL-NEXT: s_endpgm
@@ -335,6 +335,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-LABEL: divergent_value:
; GFX1032DAGISEL: ; %bb.0: ; %entry
; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0
; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -346,7 +347,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1032DAGISEL-NEXT: s_endpgm
@@ -373,6 +373,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-LABEL: divergent_value:
; GFX1164DAGISEL: ; %bb.0: ; %entry
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0
@@ -386,7 +387,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1164DAGISEL-NEXT: s_endpgm
@@ -415,7 +415,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-LABEL: divergent_value:
; GFX1132DAGISEL: ; %bb.0: ; %entry
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0
; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -427,7 +427,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
+; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
@@ -455,7 +455,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX12DAGISEL-LABEL: divergent_value:
; GFX12DAGISEL: ; %bb.0: ; %entry
; GFX12DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX12DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX12DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX12DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX12DAGISEL-NEXT: s_mov_b32 s2, 0
; GFX12DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -467,7 +467,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX12DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
; GFX12DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX12DAGISEL-NEXT: ; %bb.2:
-; GFX12DAGISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
+; GFX12DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX12DAGISEL-NEXT: s_wait_kmcnt 0x0
; GFX12DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX12DAGISEL-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
index 791f1b59fe83e..54c8e2e248f57 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
@@ -169,6 +169,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX9DAGISEL-LABEL: divergent_value:
; GFX9DAGISEL: ; %bb.0: ; %entry
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -180,7 +181,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
@@ -207,6 +207,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX1064DAGISEL-LABEL: divergent_value:
; GFX1064DAGISEL: ; %bb.0: ; %entry
; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -218,7 +219,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1064DAGISEL-NEXT: s_endpgm
@@ -245,6 +245,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX1032DAGISEL-LABEL: divergent_value:
; GFX1032DAGISEL: ; %bb.0: ; %entry
; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0
; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -256,7 +257,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1032DAGISEL-NEXT: s_endpgm
@@ -283,6 +283,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX1164DAGISEL-LABEL: divergent_value:
; GFX1164DAGISEL: ; %bb.0: ; %entry
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0
@@ -296,7 +297,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1164DAGISEL-NEXT: s_endpgm
@@ -325,7 +325,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX1132DAGISEL-LABEL: divergent_value:
; GFX1132DAGISEL: ; %bb.0: ; %entry
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0
; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -337,7 +337,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
+; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
index 25ba296a76524..502ef84449751 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
@@ -169,6 +169,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-LABEL: divergent_value:
; GFX9DAGISEL: ; %bb.0: ; %entry
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s4, -1
; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -180,7 +181,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
@@ -207,6 +207,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-LABEL: divergent_value:
; GFX1064DAGISEL: ; %bb.0: ; %entry
; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, -1
; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -218,7 +219,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1064DAGISEL-NEXT: s_endpgm
@@ -245,6 +245,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-LABEL: divergent_value:
; GFX1032DAGISEL: ; %bb.0: ; %entry
; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, -1
; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -256,7 +257,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1032DAGISEL-NEXT: s_endpgm
@@ -283,6 +283,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-LABEL: divergent_value:
; GFX1164DAGISEL: ; %bb.0: ; %entry
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s4, -1
@@ -296,7 +297,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1164DAGISEL-NEXT: s_endpgm
@@ -325,7 +325,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-LABEL: divergent_value:
; GFX1132DAGISEL: ; %bb.0: ; %entry
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, -1
; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -337,7 +337,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
+; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll
index 23f46ab185f98..d5f1750c268ab 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll
@@ -244,6 +244,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-LABEL: divergent_value:
; GFX9DAGISEL: ; %bb.0: ; %entry
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -255,7 +256,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
@@ -282,6 +282,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-LABEL: divergent_value:
; GFX1064DAGISEL: ; %bb.0: ; %entry
; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -293,7 +294,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1064DAGISEL-NEXT: s_endpgm
@@ -320,6 +320,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-LABEL: divergent_value:
; GFX1032DAGISEL: ; %bb.0: ; %entry
; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0
; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -331,7 +332,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1032DAGISEL-NEXT: s_endpgm
@@ -358,6 +358,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-LABEL: divergent_value:
; GFX1164DAGISEL: ; %bb.0: ; %entry
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0
@@ -371,7 +372,6 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1164DAGISEL-NEXT: s_endpgm
@@ -400,7 +400,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-LABEL: divergent_value:
; GFX1132DAGISEL: ; %bb.0: ; %entry
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0
; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
@@ -412,7 +412,7 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
+; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-rematerialization-scoring.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-rematerialization-scoring.mir
index 37ea20a4683da..e3ebc4e9789c4 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-rematerialization-scoring.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-rematerialization-scoring.mir
@@ -28,14 +28,37 @@
# | 5 |
# +---+
-# Rematerializing %32 is always beneficial because the defining and using
-# regions have the same frequency. It should be rematerialized first.
-name: favor_always_benef
+--- |
+ define void @small_num_sgprs_as_spill() "amdgpu-num-sgpr"="85" {
+ ret void
+ }
+ define void @small_num_vgprs_as_spill() "amdgpu-num-vgpr"="14" {
+ ret void
+ }
+ define void @dont_remat_waves_per_eu() "amdgpu-flat-work-group-size"="1024,1024" "amdgpu-waves-per-eu"="7,7" {
+ ret void
+ }
+ define void @dont_remat_at_max_occ() "amdgpu-waves-per-eu"="8,8" {
+ ret void
+ }
+ define void @reduce_arch_and_acc_vgrp_spill() "amdgpu-waves-per-eu"="8,8" {
+ ret void
+ }
+ define void @reduce_spill_archvgpr_above_addressable_limit() "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,2" {
+ ret void
+ }
+ define void @reduce_spill_agpr_above_addressable_limit() "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,2" {
+ ret void
+ }
+---
+# %32's defining and using region frequencies are identical therefore it is the
+# best register to rematerialize.
+name: favor_same_frequency
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
- ; CHECK-LABEL: name: favor_always_benef
+ ; CHECK-LABEL: name: favor_same_frequency
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
@@ -193,15 +216,14 @@ body: |
S_ENDPGM 0
...
---
-# Rematerializing registers used in bb.2 is more beneficial than rematerializing
-# registers in bb.4 since they are live-through in higher frequency regions
-# (bb.4), which contribute more to the score.
-name: favor_live_through_in_high_freq_region
+# bb.2's frequency is lesser than bb.4's therefore it is preferable to
+# rematerialize registers in bb.2 instead of bb.4.
+name: favor_lower_frequency
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
- ; CHECK-LABEL: name: favor_live_through_in_high_freq_region
+ ; CHECK-LABEL: name: favor_lower_frequency
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
@@ -211,8 +233,6 @@ body: |
; CHECK-NEXT: %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; CHECK-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
; CHECK-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
- ; CHECK-NEXT: %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, %loop_if_bound, implicit $exec
- ; CHECK-NEXT: %loop_counter:sreg_32 = COPY %mem_data.sub1
; CHECK-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
; CHECK-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode
; CHECK-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode
@@ -234,14 +254,9 @@ body: |
; CHECK-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode
; CHECK-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
; CHECK-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
+ ; CHECK-NEXT: %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, %loop_if_bound, implicit $exec
+ ; CHECK-NEXT: %loop_counter:sreg_32 = COPY %mem_data.sub1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
@@ -255,8 +270,15 @@ body: |
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode
- ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]], implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
+ ; CHECK-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_24]], implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x7c000000), %bb.5(0x04000000)
@@ -276,7 +298,7 @@ body: |
; CHECK-NEXT: S_BRANCH %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
- ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]]
+ ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]]
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr0_sgpr1
@@ -360,12 +382,12 @@ body: |
...
---
# Rematerializing registers used in bb.4 is the only option.
-name: use_only_region_possible
+name: remat_in_only_possible_region
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
- ; CHECK-LABEL: name: use_only_region_possible
+ ; CHECK-LABEL: name: remat_in_only_possible_region
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir
index 7f14afce3ff96..1daa709ab6439 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir
@@ -37,206 +37,208 @@ body: |
; GFX908: bb.0:
; GFX908-NEXT: successors: %bb.1(0x80000000)
; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 6
+ ; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 7
+ ; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 8
+ ; GFX908-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 9
+ ; GFX908-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 10
+ ; GFX908-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
+ ; GFX908-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 12
+ ; GFX908-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 13
+ ; GFX908-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 14
+ ; GFX908-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 15
+ ; GFX908-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 16
+ ; GFX908-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 17
+ ; GFX908-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 18
+ ; GFX908-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 19
+ ; GFX908-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 20
+ ; GFX908-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 21
+ ; GFX908-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
+ ; GFX908-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
+ ; GFX908-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
+ ; GFX908-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
+ ; GFX908-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
+ ; GFX908-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
+ ; GFX908-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
+ ; GFX908-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
+ ; GFX908-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 30
+ ; GFX908-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 31
+ ; GFX908-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 32
+ ; GFX908-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 33
+ ; GFX908-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 34
+ ; GFX908-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 35
+ ; GFX908-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 36
+ ; GFX908-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 37
+ ; GFX908-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 38
+ ; GFX908-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 39
+ ; GFX908-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 40
+ ; GFX908-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 41
+ ; GFX908-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 42
+ ; GFX908-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 43
+ ; GFX908-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 44
+ ; GFX908-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 45
+ ; GFX908-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 46
+ ; GFX908-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
+ ; GFX908-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 48
+ ; GFX908-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 49
+ ; GFX908-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 50
+ ; GFX908-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 51
+ ; GFX908-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 52
+ ; GFX908-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 53
+ ; GFX908-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 54
+ ; GFX908-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 55
+ ; GFX908-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 56
+ ; GFX908-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 57
+ ; GFX908-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 58
+ ; GFX908-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 59
+ ; GFX908-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 60
+ ; GFX908-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 61
+ ; GFX908-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 62
+ ; GFX908-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 63
+ ; GFX908-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 64
+ ; GFX908-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 65
+ ; GFX908-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 66
+ ; GFX908-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 67
+ ; GFX908-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 68
+ ; GFX908-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
+ ; GFX908-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
+ ; GFX908-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
+ ; GFX908-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
+ ; GFX908-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
+ ; GFX908-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
+ ; GFX908-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
+ ; GFX908-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
+ ; GFX908-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
+ ; GFX908-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
+ ; GFX908-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
+ ; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
- ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
- ; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
- ; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 2
- ; GFX908-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 3
- ; GFX908-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 4
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
- ; GFX908-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 5
- ; GFX908-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 6
- ; GFX908-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 7
- ; GFX908-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 8
- ; GFX908-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 9
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_5]], implicit [[S_MOV_B32_6]], implicit [[S_MOV_B32_7]], implicit [[S_MOV_B32_8]], implicit [[S_MOV_B32_9]]
- ; GFX908-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 10
- ; GFX908-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
- ; GFX908-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 12
- ; GFX908-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 13
- ; GFX908-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 14
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_10]], implicit [[S_MOV_B32_11]], implicit [[S_MOV_B32_12]], implicit [[S_MOV_B32_13]], implicit [[S_MOV_B32_14]]
- ; GFX908-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 15
- ; GFX908-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 16
- ; GFX908-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 17
- ; GFX908-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 18
- ; GFX908-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 19
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_15]], implicit [[S_MOV_B32_16]], implicit [[S_MOV_B32_17]], implicit [[S_MOV_B32_18]], implicit [[S_MOV_B32_19]]
- ; GFX908-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 20
- ; GFX908-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 21
- ; GFX908-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
- ; GFX908-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
- ; GFX908-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_20]], implicit [[S_MOV_B32_21]], implicit [[S_MOV_B32_22]], implicit [[S_MOV_B32_23]], implicit [[S_MOV_B32_24]]
- ; GFX908-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
- ; GFX908-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
- ; GFX908-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
- ; GFX908-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
- ; GFX908-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_25]], implicit [[S_MOV_B32_26]], implicit [[S_MOV_B32_27]], implicit [[S_MOV_B32_28]], implicit [[S_MOV_B32_29]]
- ; GFX908-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 30
- ; GFX908-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 31
- ; GFX908-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 32
- ; GFX908-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 33
- ; GFX908-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 34
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_30]], implicit [[S_MOV_B32_31]], implicit [[S_MOV_B32_32]], implicit [[S_MOV_B32_33]], implicit [[S_MOV_B32_34]]
- ; GFX908-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 35
- ; GFX908-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 36
- ; GFX908-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 37
- ; GFX908-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 38
- ; GFX908-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 39
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_35]], implicit [[S_MOV_B32_36]], implicit [[S_MOV_B32_37]], implicit [[S_MOV_B32_38]], implicit [[S_MOV_B32_39]]
- ; GFX908-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 40
- ; GFX908-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 41
- ; GFX908-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 42
- ; GFX908-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 43
- ; GFX908-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 44
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_40]], implicit [[S_MOV_B32_41]], implicit [[S_MOV_B32_42]], implicit [[S_MOV_B32_43]], implicit [[S_MOV_B32_44]]
- ; GFX908-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 45
- ; GFX908-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 46
- ; GFX908-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
- ; GFX908-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 48
- ; GFX908-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 49
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_45]], implicit [[S_MOV_B32_46]], implicit [[S_MOV_B32_47]], implicit [[S_MOV_B32_48]], implicit [[S_MOV_B32_49]]
- ; GFX908-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 50
- ; GFX908-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 51
- ; GFX908-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 52
- ; GFX908-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 53
- ; GFX908-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 54
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_50]], implicit [[S_MOV_B32_51]], implicit [[S_MOV_B32_52]], implicit [[S_MOV_B32_53]], implicit [[S_MOV_B32_54]]
- ; GFX908-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 55
- ; GFX908-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 56
- ; GFX908-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 57
- ; GFX908-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 58
- ; GFX908-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 59
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_55]], implicit [[S_MOV_B32_56]], implicit [[S_MOV_B32_57]], implicit [[S_MOV_B32_58]], implicit [[S_MOV_B32_59]]
- ; GFX908-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 60
- ; GFX908-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 61
- ; GFX908-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 62
- ; GFX908-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 63
- ; GFX908-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 64
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_60]], implicit [[S_MOV_B32_61]], implicit [[S_MOV_B32_62]], implicit [[S_MOV_B32_63]], implicit [[S_MOV_B32_64]]
- ; GFX908-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 65
- ; GFX908-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 66
- ; GFX908-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 67
- ; GFX908-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 68
- ; GFX908-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_65]], implicit [[S_MOV_B32_66]], implicit [[S_MOV_B32_67]], implicit [[S_MOV_B32_68]], implicit [[S_MOV_B32_69]]
- ; GFX908-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
- ; GFX908-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
- ; GFX908-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
- ; GFX908-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
- ; GFX908-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_70]], implicit [[S_MOV_B32_71]], implicit [[S_MOV_B32_72]], implicit [[S_MOV_B32_73]], implicit [[S_MOV_B32_74]]
- ; GFX908-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
- ; GFX908-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
- ; GFX908-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
- ; GFX908-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
- ; GFX908-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_75]], implicit [[S_MOV_B32_76]], implicit [[S_MOV_B32_77]], implicit [[S_MOV_B32_78]], implicit [[S_MOV_B32_79]]
+ ; GFX908-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+ ; GFX908-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
+ ; GFX908-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 2
+ ; GFX908-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 3
+ ; GFX908-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 4
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_74]], implicit [[S_MOV_B32_75]], implicit [[S_MOV_B32_76]], implicit [[S_MOV_B32_77]], implicit [[S_MOV_B32_78]]
+ ; GFX908-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 5
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_79]], implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_4]], implicit [[S_MOV_B32_5]], implicit [[S_MOV_B32_6]], implicit [[S_MOV_B32_7]], implicit [[S_MOV_B32_8]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_9]], implicit [[S_MOV_B32_10]], implicit [[S_MOV_B32_11]], implicit [[S_MOV_B32_12]], implicit [[S_MOV_B32_13]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_14]], implicit [[S_MOV_B32_15]], implicit [[S_MOV_B32_16]], implicit [[S_MOV_B32_17]], implicit [[S_MOV_B32_18]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_19]], implicit [[S_MOV_B32_20]], implicit [[S_MOV_B32_21]], implicit [[S_MOV_B32_22]], implicit [[S_MOV_B32_23]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_24]], implicit [[S_MOV_B32_25]], implicit [[S_MOV_B32_26]], implicit [[S_MOV_B32_27]], implicit [[S_MOV_B32_28]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_29]], implicit [[S_MOV_B32_30]], implicit [[S_MOV_B32_31]], implicit [[S_MOV_B32_32]], implicit [[S_MOV_B32_33]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_34]], implicit [[S_MOV_B32_35]], implicit [[S_MOV_B32_36]], implicit [[S_MOV_B32_37]], implicit [[S_MOV_B32_38]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_39]], implicit [[S_MOV_B32_40]], implicit [[S_MOV_B32_41]], implicit [[S_MOV_B32_42]], implicit [[S_MOV_B32_43]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_44]], implicit [[S_MOV_B32_45]], implicit [[S_MOV_B32_46]], implicit [[S_MOV_B32_47]], implicit [[S_MOV_B32_48]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_49]], implicit [[S_MOV_B32_50]], implicit [[S_MOV_B32_51]], implicit [[S_MOV_B32_52]], implicit [[S_MOV_B32_53]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_54]], implicit [[S_MOV_B32_55]], implicit [[S_MOV_B32_56]], implicit [[S_MOV_B32_57]], implicit [[S_MOV_B32_58]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_59]], implicit [[S_MOV_B32_60]], implicit [[S_MOV_B32_61]], implicit [[S_MOV_B32_62]], implicit [[S_MOV_B32_63]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_64]], implicit [[S_MOV_B32_65]], implicit [[S_MOV_B32_66]], implicit [[S_MOV_B32_67]], implicit [[S_MOV_B32_68]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_69]], implicit [[S_MOV_B32_70]], implicit [[S_MOV_B32_71]], implicit [[S_MOV_B32_72]], implicit [[S_MOV_B32_73]]
; GFX908-NEXT: S_ENDPGM 0
;
; GFX90A-LABEL: name: small_num_sgprs_as_spill
; GFX90A: bb.0:
; GFX90A-NEXT: successors: %bb.1(0x80000000)
; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 6
+ ; GFX90A-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 7
+ ; GFX90A-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 8
+ ; GFX90A-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 9
+ ; GFX90A-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 10
+ ; GFX90A-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
+ ; GFX90A-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 12
+ ; GFX90A-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 13
+ ; GFX90A-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 14
+ ; GFX90A-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 15
+ ; GFX90A-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 16
+ ; GFX90A-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 17
+ ; GFX90A-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 18
+ ; GFX90A-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 19
+ ; GFX90A-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 20
+ ; GFX90A-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 21
+ ; GFX90A-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
+ ; GFX90A-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
+ ; GFX90A-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
+ ; GFX90A-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
+ ; GFX90A-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
+ ; GFX90A-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
+ ; GFX90A-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
+ ; GFX90A-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
+ ; GFX90A-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 30
+ ; GFX90A-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 31
+ ; GFX90A-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 32
+ ; GFX90A-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 33
+ ; GFX90A-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 34
+ ; GFX90A-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 35
+ ; GFX90A-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 36
+ ; GFX90A-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 37
+ ; GFX90A-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 38
+ ; GFX90A-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 39
+ ; GFX90A-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 40
+ ; GFX90A-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 41
+ ; GFX90A-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 42
+ ; GFX90A-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 43
+ ; GFX90A-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 44
+ ; GFX90A-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 45
+ ; GFX90A-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 46
+ ; GFX90A-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
+ ; GFX90A-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 48
+ ; GFX90A-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 49
+ ; GFX90A-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 50
+ ; GFX90A-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 51
+ ; GFX90A-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 52
+ ; GFX90A-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 53
+ ; GFX90A-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 54
+ ; GFX90A-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 55
+ ; GFX90A-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 56
+ ; GFX90A-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 57
+ ; GFX90A-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 58
+ ; GFX90A-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 59
+ ; GFX90A-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 60
+ ; GFX90A-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 61
+ ; GFX90A-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 62
+ ; GFX90A-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 63
+ ; GFX90A-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 64
+ ; GFX90A-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 65
+ ; GFX90A-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 66
+ ; GFX90A-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 67
+ ; GFX90A-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 68
+ ; GFX90A-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
+ ; GFX90A-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
+ ; GFX90A-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
+ ; GFX90A-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
+ ; GFX90A-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
+ ; GFX90A-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
+ ; GFX90A-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
+ ; GFX90A-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
+ ; GFX90A-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
+ ; GFX90A-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
+ ; GFX90A-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
+ ; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.1:
- ; GFX90A-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
- ; GFX90A-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
- ; GFX90A-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 2
- ; GFX90A-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 3
- ; GFX90A-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 4
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
- ; GFX90A-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 5
- ; GFX90A-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 6
- ; GFX90A-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 7
- ; GFX90A-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 8
- ; GFX90A-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 9
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_5]], implicit [[S_MOV_B32_6]], implicit [[S_MOV_B32_7]], implicit [[S_MOV_B32_8]], implicit [[S_MOV_B32_9]]
- ; GFX90A-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 10
- ; GFX90A-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
- ; GFX90A-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 12
- ; GFX90A-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 13
- ; GFX90A-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 14
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_10]], implicit [[S_MOV_B32_11]], implicit [[S_MOV_B32_12]], implicit [[S_MOV_B32_13]], implicit [[S_MOV_B32_14]]
- ; GFX90A-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 15
- ; GFX90A-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 16
- ; GFX90A-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 17
- ; GFX90A-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 18
- ; GFX90A-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 19
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_15]], implicit [[S_MOV_B32_16]], implicit [[S_MOV_B32_17]], implicit [[S_MOV_B32_18]], implicit [[S_MOV_B32_19]]
- ; GFX90A-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 20
- ; GFX90A-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 21
- ; GFX90A-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
- ; GFX90A-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
- ; GFX90A-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_20]], implicit [[S_MOV_B32_21]], implicit [[S_MOV_B32_22]], implicit [[S_MOV_B32_23]], implicit [[S_MOV_B32_24]]
- ; GFX90A-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
- ; GFX90A-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
- ; GFX90A-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
- ; GFX90A-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
- ; GFX90A-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_25]], implicit [[S_MOV_B32_26]], implicit [[S_MOV_B32_27]], implicit [[S_MOV_B32_28]], implicit [[S_MOV_B32_29]]
- ; GFX90A-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 30
- ; GFX90A-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 31
- ; GFX90A-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 32
- ; GFX90A-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 33
- ; GFX90A-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 34
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_30]], implicit [[S_MOV_B32_31]], implicit [[S_MOV_B32_32]], implicit [[S_MOV_B32_33]], implicit [[S_MOV_B32_34]]
- ; GFX90A-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 35
- ; GFX90A-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 36
- ; GFX90A-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 37
- ; GFX90A-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 38
- ; GFX90A-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 39
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_35]], implicit [[S_MOV_B32_36]], implicit [[S_MOV_B32_37]], implicit [[S_MOV_B32_38]], implicit [[S_MOV_B32_39]]
- ; GFX90A-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 40
- ; GFX90A-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 41
- ; GFX90A-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 42
- ; GFX90A-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 43
- ; GFX90A-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 44
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_40]], implicit [[S_MOV_B32_41]], implicit [[S_MOV_B32_42]], implicit [[S_MOV_B32_43]], implicit [[S_MOV_B32_44]]
- ; GFX90A-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 45
- ; GFX90A-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 46
- ; GFX90A-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
- ; GFX90A-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 48
- ; GFX90A-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 49
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_45]], implicit [[S_MOV_B32_46]], implicit [[S_MOV_B32_47]], implicit [[S_MOV_B32_48]], implicit [[S_MOV_B32_49]]
- ; GFX90A-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 50
- ; GFX90A-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 51
- ; GFX90A-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 52
- ; GFX90A-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 53
- ; GFX90A-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 54
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_50]], implicit [[S_MOV_B32_51]], implicit [[S_MOV_B32_52]], implicit [[S_MOV_B32_53]], implicit [[S_MOV_B32_54]]
- ; GFX90A-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 55
- ; GFX90A-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 56
- ; GFX90A-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 57
- ; GFX90A-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 58
- ; GFX90A-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 59
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_55]], implicit [[S_MOV_B32_56]], implicit [[S_MOV_B32_57]], implicit [[S_MOV_B32_58]], implicit [[S_MOV_B32_59]]
- ; GFX90A-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 60
- ; GFX90A-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 61
- ; GFX90A-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 62
- ; GFX90A-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 63
- ; GFX90A-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 64
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_60]], implicit [[S_MOV_B32_61]], implicit [[S_MOV_B32_62]], implicit [[S_MOV_B32_63]], implicit [[S_MOV_B32_64]]
- ; GFX90A-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 65
- ; GFX90A-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 66
- ; GFX90A-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 67
- ; GFX90A-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 68
- ; GFX90A-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_65]], implicit [[S_MOV_B32_66]], implicit [[S_MOV_B32_67]], implicit [[S_MOV_B32_68]], implicit [[S_MOV_B32_69]]
- ; GFX90A-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
- ; GFX90A-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
- ; GFX90A-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
- ; GFX90A-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
- ; GFX90A-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_70]], implicit [[S_MOV_B32_71]], implicit [[S_MOV_B32_72]], implicit [[S_MOV_B32_73]], implicit [[S_MOV_B32_74]]
- ; GFX90A-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
- ; GFX90A-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
- ; GFX90A-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
- ; GFX90A-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
- ; GFX90A-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_75]], implicit [[S_MOV_B32_76]], implicit [[S_MOV_B32_77]], implicit [[S_MOV_B32_78]], implicit [[S_MOV_B32_79]]
+ ; GFX90A-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+ ; GFX90A-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
+ ; GFX90A-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 2
+ ; GFX90A-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 3
+ ; GFX90A-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 4
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_74]], implicit [[S_MOV_B32_75]], implicit [[S_MOV_B32_76]], implicit [[S_MOV_B32_77]], implicit [[S_MOV_B32_78]]
+ ; GFX90A-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 5
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_79]], implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_4]], implicit [[S_MOV_B32_5]], implicit [[S_MOV_B32_6]], implicit [[S_MOV_B32_7]], implicit [[S_MOV_B32_8]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_9]], implicit [[S_MOV_B32_10]], implicit [[S_MOV_B32_11]], implicit [[S_MOV_B32_12]], implicit [[S_MOV_B32_13]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_14]], implicit [[S_MOV_B32_15]], implicit [[S_MOV_B32_16]], implicit [[S_MOV_B32_17]], implicit [[S_MOV_B32_18]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_19]], implicit [[S_MOV_B32_20]], implicit [[S_MOV_B32_21]], implicit [[S_MOV_B32_22]], implicit [[S_MOV_B32_23]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_24]], implicit [[S_MOV_B32_25]], implicit [[S_MOV_B32_26]], implicit [[S_MOV_B32_27]], implicit [[S_MOV_B32_28]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_29]], implicit [[S_MOV_B32_30]], implicit [[S_MOV_B32_31]], implicit [[S_MOV_B32_32]], implicit [[S_MOV_B32_33]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_34]], implicit [[S_MOV_B32_35]], implicit [[S_MOV_B32_36]], implicit [[S_MOV_B32_37]], implicit [[S_MOV_B32_38]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_39]], implicit [[S_MOV_B32_40]], implicit [[S_MOV_B32_41]], implicit [[S_MOV_B32_42]], implicit [[S_MOV_B32_43]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_44]], implicit [[S_MOV_B32_45]], implicit [[S_MOV_B32_46]], implicit [[S_MOV_B32_47]], implicit [[S_MOV_B32_48]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_49]], implicit [[S_MOV_B32_50]], implicit [[S_MOV_B32_51]], implicit [[S_MOV_B32_52]], implicit [[S_MOV_B32_53]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_54]], implicit [[S_MOV_B32_55]], implicit [[S_MOV_B32_56]], implicit [[S_MOV_B32_57]], implicit [[S_MOV_B32_58]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_59]], implicit [[S_MOV_B32_60]], implicit [[S_MOV_B32_61]], implicit [[S_MOV_B32_62]], implicit [[S_MOV_B32_63]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_64]], implicit [[S_MOV_B32_65]], implicit [[S_MOV_B32_66]], implicit [[S_MOV_B32_67]], implicit [[S_MOV_B32_68]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_69]], implicit [[S_MOV_B32_70]], implicit [[S_MOV_B32_71]], implicit [[S_MOV_B32_72]], implicit [[S_MOV_B32_73]]
; GFX90A-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1
@@ -371,15 +373,15 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_14]], implicit [[V_CVT_I32_F64_e32_15]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_13]]
; GFX908-NEXT: S_ENDPGM 0
;
; GFX90A-LABEL: name: small_num_vgprs_as_spill
@@ -399,14 +401,14 @@ body: |
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.1:
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]]
; GFX90A-NEXT: S_ENDPGM 0
bb.0:
@@ -473,12 +475,6 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: bb.1:
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode
; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode
@@ -491,6 +487,12 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode
; GFX908-NEXT: [[V_CVT_I32_F64_e32_34:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 34, implicit $exec, implicit $mode
; GFX908-NEXT: [[V_CVT_I32_F64_e32_35:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 35, implicit $exec, implicit $mode
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.1:
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]], implicit [[V_CVT_I32_F64_e32_34]]
@@ -525,27 +527,27 @@ body: |
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.1:
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_34:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 34, implicit $exec, implicit $mode
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]], implicit [[V_CVT_I32_F64_e32_34]]
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_35:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 35, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: bb.1:
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]], implicit [[V_CVT_I32_F64_e32_34]]
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_35]]
; GFX90A-NEXT: S_ENDPGM 0
bb.0:
@@ -637,11 +639,6 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: bb.1:
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode
; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode
@@ -650,6 +647,11 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.1:
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
@@ -684,22 +686,22 @@ body: |
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.1:
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: bb.1:
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]]
; GFX90A-NEXT: S_ENDPGM 0
bb.0:
@@ -765,6 +767,35 @@ body: |
; GFX908-NEXT: successors: %bb.1(0x80000000)
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF1:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF2:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF3:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF4:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF5:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF6:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF7:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF8:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF9:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF10:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF11:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF12:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF13:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF14:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF15:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF16:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF17:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF18:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF19:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF20:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF21:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF22:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF23:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF24:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF25:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF26:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF27:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF28:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
@@ -791,61 +822,32 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 64, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
- ; GFX908-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF1:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF2:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]]
- ; GFX908-NEXT: [[DEF3:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF4:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF5:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF6:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF7:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]]
- ; GFX908-NEXT: [[DEF8:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF9:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF10:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF11:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF12:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF8]], implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]]
- ; GFX908-NEXT: [[DEF13:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF14:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF15:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF16:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF17:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 64, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]]
- ; GFX908-NEXT: [[DEF18:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF19:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF20:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF21:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF22:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF18]], implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]]
- ; GFX908-NEXT: [[DEF23:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF24:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF25:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF26:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF27:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]]
- ; GFX908-NEXT: [[DEF28:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]]
; GFX908-NEXT: [[DEF29:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF30:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF31:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF28]], implicit [[DEF29]], implicit [[DEF30]], implicit [[DEF31]], implicit [[V_CVT_I32_F64_e32_32]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]], implicit [[DEF29]], implicit [[DEF30]], implicit [[DEF31]]
; GFX908-NEXT: [[DEF32:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF32]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF32]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]], implicit [[DEF13]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]], implicit [[DEF18]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]], implicit [[DEF23]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]], implicit [[V_CVT_I32_F64_e32_31]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF28]]
; GFX908-NEXT: S_ENDPGM 0
;
; GFX90A-LABEL: name: reduce_arch_and_acc_vgrp_spill
@@ -879,61 +881,61 @@ body: |
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.1:
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
; GFX90A-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF1:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF2:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]]
; GFX90A-NEXT: [[DEF3:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF4:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF5:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF6:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF7:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]]
; GFX90A-NEXT: [[DEF8:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF9:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF10:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF11:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF12:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF8]], implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]]
; GFX90A-NEXT: [[DEF13:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF14:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF15:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF16:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF17:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]]
; GFX90A-NEXT: [[DEF18:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF19:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF20:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF21:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF22:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF18]], implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]]
; GFX90A-NEXT: [[DEF23:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF24:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF25:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF26:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF27:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]]
; GFX90A-NEXT: [[DEF28:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 64, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: bb.1:
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]]
; GFX90A-NEXT: [[DEF29:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF30:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF31:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 64, implicit $exec, implicit $mode
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF28]], implicit [[DEF29]], implicit [[DEF30]], implicit [[DEF31]], implicit [[V_CVT_I32_F64_e32_32]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]], implicit [[DEF29]], implicit [[DEF30]], implicit [[DEF31]]
; GFX90A-NEXT: [[DEF32:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF32]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF32]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]], implicit [[DEF13]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]], implicit [[DEF18]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]], implicit [[DEF23]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]], implicit [[V_CVT_I32_F64_e32_31]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF28]]
; GFX90A-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1
@@ -1039,6 +1041,7 @@ body: |
; GFX908-NEXT: successors: %bb.1(0x80000000)
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
@@ -1293,6 +1296,7 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_252:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 252, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_253:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 253, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_254:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 254, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_255:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 256, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
@@ -1320,10 +1324,8 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_220]], implicit [[V_CVT_I32_F64_e32_221]], implicit [[V_CVT_I32_F64_e32_222]], implicit [[V_CVT_I32_F64_e32_223]], implicit [[V_CVT_I32_F64_e32_224]], implicit [[V_CVT_I32_F64_e32_225]], implicit [[V_CVT_I32_F64_e32_226]], implicit [[V_CVT_I32_F64_e32_227]], implicit [[V_CVT_I32_F64_e32_228]], implicit [[V_CVT_I32_F64_e32_229]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_230]], implicit [[V_CVT_I32_F64_e32_231]], implicit [[V_CVT_I32_F64_e32_232]], implicit [[V_CVT_I32_F64_e32_233]], implicit [[V_CVT_I32_F64_e32_234]], implicit [[V_CVT_I32_F64_e32_235]], implicit [[V_CVT_I32_F64_e32_236]], implicit [[V_CVT_I32_F64_e32_237]], implicit [[V_CVT_I32_F64_e32_238]], implicit [[V_CVT_I32_F64_e32_239]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_240]], implicit [[V_CVT_I32_F64_e32_241]], implicit [[V_CVT_I32_F64_e32_242]], implicit [[V_CVT_I32_F64_e32_243]], implicit [[V_CVT_I32_F64_e32_244]], implicit [[V_CVT_I32_F64_e32_245]], implicit [[V_CVT_I32_F64_e32_246]], implicit [[V_CVT_I32_F64_e32_247]], implicit [[V_CVT_I32_F64_e32_248]], implicit [[V_CVT_I32_F64_e32_249]]
- ; GFX908-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_255:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 255, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_256:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 256, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_250]], implicit [[V_CVT_I32_F64_e32_251]], implicit [[V_CVT_I32_F64_e32_252]], implicit [[V_CVT_I32_F64_e32_253]], implicit [[V_CVT_I32_F64_e32_254]], implicit [[V_CVT_I32_F64_e32_255]], implicit [[V_CVT_I32_F64_e32_256]], implicit [[DEF]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_256:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 255, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_250]], implicit [[V_CVT_I32_F64_e32_251]], implicit [[V_CVT_I32_F64_e32_252]], implicit [[V_CVT_I32_F64_e32_253]], implicit [[V_CVT_I32_F64_e32_254]], implicit [[V_CVT_I32_F64_e32_256]], implicit [[V_CVT_I32_F64_e32_255]], implicit [[DEF]]
; GFX908-NEXT: S_ENDPGM 0
;
; GFX90A-LABEL: name: reduce_spill_archvgpr_above_addressable_limit
@@ -1331,6 +1333,7 @@ body: |
; GFX90A-NEXT: successors: %bb.1(0x80000000)
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
@@ -1585,6 +1588,7 @@ body: |
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_252:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 252, implicit $exec, implicit $mode, implicit-def $m0
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_253:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 253, implicit $exec, implicit $mode, implicit-def $m0
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_254:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 254, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_255:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 256, implicit $exec, implicit $mode
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.1:
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
@@ -1612,10 +1616,8 @@ body: |
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_220]], implicit [[V_CVT_I32_F64_e32_221]], implicit [[V_CVT_I32_F64_e32_222]], implicit [[V_CVT_I32_F64_e32_223]], implicit [[V_CVT_I32_F64_e32_224]], implicit [[V_CVT_I32_F64_e32_225]], implicit [[V_CVT_I32_F64_e32_226]], implicit [[V_CVT_I32_F64_e32_227]], implicit [[V_CVT_I32_F64_e32_228]], implicit [[V_CVT_I32_F64_e32_229]]
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_230]], implicit [[V_CVT_I32_F64_e32_231]], implicit [[V_CVT_I32_F64_e32_232]], implicit [[V_CVT_I32_F64_e32_233]], implicit [[V_CVT_I32_F64_e32_234]], implicit [[V_CVT_I32_F64_e32_235]], implicit [[V_CVT_I32_F64_e32_236]], implicit [[V_CVT_I32_F64_e32_237]], implicit [[V_CVT_I32_F64_e32_238]], implicit [[V_CVT_I32_F64_e32_239]]
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_240]], implicit [[V_CVT_I32_F64_e32_241]], implicit [[V_CVT_I32_F64_e32_242]], implicit [[V_CVT_I32_F64_e32_243]], implicit [[V_CVT_I32_F64_e32_244]], implicit [[V_CVT_I32_F64_e32_245]], implicit [[V_CVT_I32_F64_e32_246]], implicit [[V_CVT_I32_F64_e32_247]], implicit [[V_CVT_I32_F64_e32_248]], implicit [[V_CVT_I32_F64_e32_249]]
- ; GFX90A-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_255:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 255, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_256:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 256, implicit $exec, implicit $mode
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_250]], implicit [[V_CVT_I32_F64_e32_251]], implicit [[V_CVT_I32_F64_e32_252]], implicit [[V_CVT_I32_F64_e32_253]], implicit [[V_CVT_I32_F64_e32_254]], implicit [[V_CVT_I32_F64_e32_255]], implicit [[V_CVT_I32_F64_e32_256]], implicit [[DEF]]
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_256:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 255, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_250]], implicit [[V_CVT_I32_F64_e32_251]], implicit [[V_CVT_I32_F64_e32_252]], implicit [[V_CVT_I32_F64_e32_253]], implicit [[V_CVT_I32_F64_e32_254]], implicit [[V_CVT_I32_F64_e32_256]], implicit [[V_CVT_I32_F64_e32_255]], implicit [[DEF]]
; GFX90A-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1
@@ -1924,7 +1926,8 @@ body: |
; GFX908: bb.0:
; GFX908-NEXT: successors: %bb.1(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: bb.1:
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 257, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 258, implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF1:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF2:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -1935,7 +1938,6 @@ body: |
; GFX908-NEXT: [[DEF7:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF8:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF9:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]], implicit [[DEF9]]
; GFX908-NEXT: [[DEF10:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF11:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF12:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -1946,7 +1948,6 @@ body: |
; GFX908-NEXT: [[DEF17:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF18:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF19:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]], implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]], implicit [[DEF18]], implicit [[DEF19]]
; GFX908-NEXT: [[DEF20:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF21:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF22:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -1957,7 +1958,6 @@ body: |
; GFX908-NEXT: [[DEF27:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF28:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF29:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]], implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]], implicit [[DEF28]], implicit [[DEF29]]
; GFX908-NEXT: [[DEF30:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF31:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF32:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -1968,7 +1968,6 @@ body: |
; GFX908-NEXT: [[DEF37:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF38:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF39:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF30]], implicit [[DEF31]], implicit [[DEF32]], implicit [[DEF33]], implicit [[DEF34]], implicit [[DEF35]], implicit [[DEF36]], implicit [[DEF37]], implicit [[DEF38]], implicit [[DEF39]]
; GFX908-NEXT: [[DEF40:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF41:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF42:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -1979,7 +1978,6 @@ body: |
; GFX908-NEXT: [[DEF47:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF48:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF49:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF40]], implicit [[DEF41]], implicit [[DEF42]], implicit [[DEF43]], implicit [[DEF44]], implicit [[DEF45]], implicit [[DEF46]], implicit [[DEF47]], implicit [[DEF48]], implicit [[DEF49]]
; GFX908-NEXT: [[DEF50:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF51:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF52:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -1990,7 +1988,6 @@ body: |
; GFX908-NEXT: [[DEF57:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF58:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF59:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF50]], implicit [[DEF51]], implicit [[DEF52]], implicit [[DEF53]], implicit [[DEF54]], implicit [[DEF55]], implicit [[DEF56]], implicit [[DEF57]], implicit [[DEF58]], implicit [[DEF59]]
; GFX908-NEXT: [[DEF60:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF61:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF62:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2001,7 +1998,6 @@ body: |
; GFX908-NEXT: [[DEF67:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF68:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF69:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF60]], implicit [[DEF61]], implicit [[DEF62]], implicit [[DEF63]], implicit [[DEF64]], implicit [[DEF65]], implicit [[DEF66]], implicit [[DEF67]], implicit [[DEF68]], implicit [[DEF69]]
; GFX908-NEXT: [[DEF70:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF71:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF72:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2012,7 +2008,6 @@ body: |
; GFX908-NEXT: [[DEF77:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF78:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF79:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF70]], implicit [[DEF71]], implicit [[DEF72]], implicit [[DEF73]], implicit [[DEF74]], implicit [[DEF75]], implicit [[DEF76]], implicit [[DEF77]], implicit [[DEF78]], implicit [[DEF79]]
; GFX908-NEXT: [[DEF80:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF81:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF82:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2023,7 +2018,6 @@ body: |
; GFX908-NEXT: [[DEF87:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF88:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF89:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF80]], implicit [[DEF81]], implicit [[DEF82]], implicit [[DEF83]], implicit [[DEF84]], implicit [[DEF85]], implicit [[DEF86]], implicit [[DEF87]], implicit [[DEF88]], implicit [[DEF89]]
; GFX908-NEXT: [[DEF90:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF91:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF92:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2034,7 +2028,6 @@ body: |
; GFX908-NEXT: [[DEF97:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF98:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF99:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF90]], implicit [[DEF91]], implicit [[DEF92]], implicit [[DEF93]], implicit [[DEF94]], implicit [[DEF95]], implicit [[DEF96]], implicit [[DEF97]], implicit [[DEF98]], implicit [[DEF99]]
; GFX908-NEXT: [[DEF100:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF101:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF102:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2045,7 +2038,6 @@ body: |
; GFX908-NEXT: [[DEF107:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF108:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF109:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF100]], implicit [[DEF101]], implicit [[DEF102]], implicit [[DEF103]], implicit [[DEF104]], implicit [[DEF105]], implicit [[DEF106]], implicit [[DEF107]], implicit [[DEF108]], implicit [[DEF109]]
; GFX908-NEXT: [[DEF110:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF111:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF112:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2056,7 +2048,6 @@ body: |
; GFX908-NEXT: [[DEF117:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF118:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF119:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF110]], implicit [[DEF111]], implicit [[DEF112]], implicit [[DEF113]], implicit [[DEF114]], implicit [[DEF115]], implicit [[DEF116]], implicit [[DEF117]], implicit [[DEF118]], implicit [[DEF119]]
; GFX908-NEXT: [[DEF120:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF121:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF122:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2067,7 +2058,6 @@ body: |
; GFX908-NEXT: [[DEF127:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF128:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF129:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF120]], implicit [[DEF121]], implicit [[DEF122]], implicit [[DEF123]], implicit [[DEF124]], implicit [[DEF125]], implicit [[DEF126]], implicit [[DEF127]], implicit [[DEF128]], implicit [[DEF129]]
; GFX908-NEXT: [[DEF130:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF131:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF132:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2078,7 +2068,6 @@ body: |
; GFX908-NEXT: [[DEF137:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF138:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF139:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF130]], implicit [[DEF131]], implicit [[DEF132]], implicit [[DEF133]], implicit [[DEF134]], implicit [[DEF135]], implicit [[DEF136]], implicit [[DEF137]], implicit [[DEF138]], implicit [[DEF139]]
; GFX908-NEXT: [[DEF140:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF141:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF142:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2089,7 +2078,6 @@ body: |
; GFX908-NEXT: [[DEF147:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF148:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF149:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF140]], implicit [[DEF141]], implicit [[DEF142]], implicit [[DEF143]], implicit [[DEF144]], implicit [[DEF145]], implicit [[DEF146]], implicit [[DEF147]], implicit [[DEF148]], implicit [[DEF149]]
; GFX908-NEXT: [[DEF150:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF151:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF152:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2100,7 +2088,6 @@ body: |
; GFX908-NEXT: [[DEF157:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF158:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF159:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF150]], implicit [[DEF151]], implicit [[DEF152]], implicit [[DEF153]], implicit [[DEF154]], implicit [[DEF155]], implicit [[DEF156]], implicit [[DEF157]], implicit [[DEF158]], implicit [[DEF159]]
; GFX908-NEXT: [[DEF160:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF161:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF162:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2111,7 +2098,6 @@ body: |
; GFX908-NEXT: [[DEF167:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF168:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF169:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF160]], implicit [[DEF161]], implicit [[DEF162]], implicit [[DEF163]], implicit [[DEF164]], implicit [[DEF165]], implicit [[DEF166]], implicit [[DEF167]], implicit [[DEF168]], implicit [[DEF169]]
; GFX908-NEXT: [[DEF170:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF171:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF172:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2122,7 +2108,6 @@ body: |
; GFX908-NEXT: [[DEF177:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF178:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF179:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF170]], implicit [[DEF171]], implicit [[DEF172]], implicit [[DEF173]], implicit [[DEF174]], implicit [[DEF175]], implicit [[DEF176]], implicit [[DEF177]], implicit [[DEF178]], implicit [[DEF179]]
; GFX908-NEXT: [[DEF180:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF181:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF182:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2133,7 +2118,6 @@ body: |
; GFX908-NEXT: [[DEF187:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF188:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF189:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF180]], implicit [[DEF181]], implicit [[DEF182]], implicit [[DEF183]], implicit [[DEF184]], implicit [[DEF185]], implicit [[DEF186]], implicit [[DEF187]], implicit [[DEF188]], implicit [[DEF189]]
; GFX908-NEXT: [[DEF190:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF191:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF192:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2144,7 +2128,6 @@ body: |
; GFX908-NEXT: [[DEF197:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF198:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF199:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF190]], implicit [[DEF191]], implicit [[DEF192]], implicit [[DEF193]], implicit [[DEF194]], implicit [[DEF195]], implicit [[DEF196]], implicit [[DEF197]], implicit [[DEF198]], implicit [[DEF199]]
; GFX908-NEXT: [[DEF200:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF201:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF202:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2155,7 +2138,6 @@ body: |
; GFX908-NEXT: [[DEF207:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF208:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF209:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF200]], implicit [[DEF201]], implicit [[DEF202]], implicit [[DEF203]], implicit [[DEF204]], implicit [[DEF205]], implicit [[DEF206]], implicit [[DEF207]], implicit [[DEF208]], implicit [[DEF209]]
; GFX908-NEXT: [[DEF210:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF211:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF212:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2166,7 +2148,6 @@ body: |
; GFX908-NEXT: [[DEF217:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF218:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF219:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF210]], implicit [[DEF211]], implicit [[DEF212]], implicit [[DEF213]], implicit [[DEF214]], implicit [[DEF215]], implicit [[DEF216]], implicit [[DEF217]], implicit [[DEF218]], implicit [[DEF219]]
; GFX908-NEXT: [[DEF220:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF221:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF222:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2177,9 +2158,6 @@ body: |
; GFX908-NEXT: [[DEF227:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF228:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF229:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 257, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 258, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF220]], implicit [[DEF221]], implicit [[DEF222]], implicit [[DEF223]], implicit [[DEF224]], implicit [[DEF225]], implicit [[DEF226]], implicit [[DEF227]], implicit [[DEF228]], implicit [[DEF229]]
; GFX908-NEXT: [[DEF230:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF231:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF232:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2190,33 +2168,57 @@ body: |
; GFX908-NEXT: [[DEF237:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF238:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF239:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF230]], implicit [[DEF231]], implicit [[DEF232]], implicit [[DEF233]], implicit [[DEF234]], implicit [[DEF235]], implicit [[DEF236]], implicit [[DEF237]], implicit [[DEF238]], implicit [[DEF239]]
; GFX908-NEXT: [[DEF240:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF241:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF242:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF243:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF244:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF245:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.1:
; GFX908-NEXT: [[DEF246:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF247:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF248:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF249:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF240]], implicit [[DEF241]], implicit [[DEF242]], implicit [[DEF243]], implicit [[DEF244]], implicit [[DEF245]], implicit [[DEF246]], implicit [[DEF247]], implicit [[DEF248]], implicit [[DEF249]]
; GFX908-NEXT: [[DEF250:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF251:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF252:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF253:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF254:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF255:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF246]], implicit [[DEF247]], implicit [[DEF248]], implicit [[DEF249]], implicit [[DEF250]], implicit [[DEF251]], implicit [[DEF252]], implicit [[DEF253]], implicit [[DEF254]], implicit [[DEF255]]
; GFX908-NEXT: [[DEF256:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF250]], implicit [[DEF251]], implicit [[DEF252]], implicit [[DEF253]], implicit [[DEF254]], implicit [[DEF255]], implicit [[DEF256]], implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF256]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]], implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]], implicit [[DEF18]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]], implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]], implicit [[DEF28]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF29]], implicit [[DEF30]], implicit [[DEF31]], implicit [[DEF32]], implicit [[DEF33]], implicit [[DEF34]], implicit [[DEF35]], implicit [[DEF36]], implicit [[DEF37]], implicit [[DEF38]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF39]], implicit [[DEF40]], implicit [[DEF41]], implicit [[DEF42]], implicit [[DEF43]], implicit [[DEF44]], implicit [[DEF45]], implicit [[DEF46]], implicit [[DEF47]], implicit [[DEF48]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF49]], implicit [[DEF50]], implicit [[DEF51]], implicit [[DEF52]], implicit [[DEF53]], implicit [[DEF54]], implicit [[DEF55]], implicit [[DEF56]], implicit [[DEF57]], implicit [[DEF58]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF59]], implicit [[DEF60]], implicit [[DEF61]], implicit [[DEF62]], implicit [[DEF63]], implicit [[DEF64]], implicit [[DEF65]], implicit [[DEF66]], implicit [[DEF67]], implicit [[DEF68]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF69]], implicit [[DEF70]], implicit [[DEF71]], implicit [[DEF72]], implicit [[DEF73]], implicit [[DEF74]], implicit [[DEF75]], implicit [[DEF76]], implicit [[DEF77]], implicit [[DEF78]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF79]], implicit [[DEF80]], implicit [[DEF81]], implicit [[DEF82]], implicit [[DEF83]], implicit [[DEF84]], implicit [[DEF85]], implicit [[DEF86]], implicit [[DEF87]], implicit [[DEF88]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF89]], implicit [[DEF90]], implicit [[DEF91]], implicit [[DEF92]], implicit [[DEF93]], implicit [[DEF94]], implicit [[DEF95]], implicit [[DEF96]], implicit [[DEF97]], implicit [[DEF98]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF99]], implicit [[DEF100]], implicit [[DEF101]], implicit [[DEF102]], implicit [[DEF103]], implicit [[DEF104]], implicit [[DEF105]], implicit [[DEF106]], implicit [[DEF107]], implicit [[DEF108]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF109]], implicit [[DEF110]], implicit [[DEF111]], implicit [[DEF112]], implicit [[DEF113]], implicit [[DEF114]], implicit [[DEF115]], implicit [[DEF116]], implicit [[DEF117]], implicit [[DEF118]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF119]], implicit [[DEF120]], implicit [[DEF121]], implicit [[DEF122]], implicit [[DEF123]], implicit [[DEF124]], implicit [[DEF125]], implicit [[DEF126]], implicit [[DEF127]], implicit [[DEF128]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF129]], implicit [[DEF130]], implicit [[DEF131]], implicit [[DEF132]], implicit [[DEF133]], implicit [[DEF134]], implicit [[DEF135]], implicit [[DEF136]], implicit [[DEF137]], implicit [[DEF138]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF139]], implicit [[DEF140]], implicit [[DEF141]], implicit [[DEF142]], implicit [[DEF143]], implicit [[DEF144]], implicit [[DEF145]], implicit [[DEF146]], implicit [[DEF147]], implicit [[DEF148]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF149]], implicit [[DEF150]], implicit [[DEF151]], implicit [[DEF152]], implicit [[DEF153]], implicit [[DEF154]], implicit [[DEF155]], implicit [[DEF156]], implicit [[DEF157]], implicit [[DEF158]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF159]], implicit [[DEF160]], implicit [[DEF161]], implicit [[DEF162]], implicit [[DEF163]], implicit [[DEF164]], implicit [[DEF165]], implicit [[DEF166]], implicit [[DEF167]], implicit [[DEF168]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF169]], implicit [[DEF170]], implicit [[DEF171]], implicit [[DEF172]], implicit [[DEF173]], implicit [[DEF174]], implicit [[DEF175]], implicit [[DEF176]], implicit [[DEF177]], implicit [[DEF178]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF179]], implicit [[DEF180]], implicit [[DEF181]], implicit [[DEF182]], implicit [[DEF183]], implicit [[DEF184]], implicit [[DEF185]], implicit [[DEF186]], implicit [[DEF187]], implicit [[DEF188]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF189]], implicit [[DEF190]], implicit [[DEF191]], implicit [[DEF192]], implicit [[DEF193]], implicit [[DEF194]], implicit [[DEF195]], implicit [[DEF196]], implicit [[DEF197]], implicit [[DEF198]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF199]], implicit [[DEF200]], implicit [[DEF201]], implicit [[DEF202]], implicit [[DEF203]], implicit [[DEF204]], implicit [[DEF205]], implicit [[DEF206]], implicit [[DEF207]], implicit [[DEF208]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF209]], implicit [[DEF210]], implicit [[DEF211]], implicit [[DEF212]], implicit [[DEF213]], implicit [[DEF214]], implicit [[DEF215]], implicit [[DEF216]], implicit [[DEF217]], implicit [[DEF218]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF219]], implicit [[DEF220]], implicit [[DEF221]], implicit [[DEF222]], implicit [[DEF223]], implicit [[DEF224]], implicit [[DEF225]], implicit [[DEF226]], implicit [[DEF227]], implicit [[DEF228]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF229]], implicit [[DEF230]], implicit [[DEF231]], implicit [[DEF232]], implicit [[DEF233]], implicit [[DEF234]], implicit [[DEF235]], implicit [[DEF236]], implicit [[DEF237]], implicit [[DEF238]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF239]], implicit [[DEF240]], implicit [[DEF241]], implicit [[DEF242]], implicit [[DEF243]], implicit [[DEF244]], implicit [[DEF245]], implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
; GFX908-NEXT: S_ENDPGM 0
;
; GFX90A-LABEL: name: reduce_spill_agpr_above_addressable_limit
; GFX90A: bb.0:
; GFX90A-NEXT: successors: %bb.1(0x80000000)
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.1:
; GFX90A-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF1:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF2:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2227,7 +2229,6 @@ body: |
; GFX90A-NEXT: [[DEF7:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF8:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF9:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]], implicit [[DEF9]]
; GFX90A-NEXT: [[DEF10:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF11:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF12:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2238,7 +2239,6 @@ body: |
; GFX90A-NEXT: [[DEF17:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF18:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF19:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]], implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]], implicit [[DEF18]], implicit [[DEF19]]
; GFX90A-NEXT: [[DEF20:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF21:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF22:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2249,7 +2249,6 @@ body: |
; GFX90A-NEXT: [[DEF27:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF28:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF29:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]], implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]], implicit [[DEF28]], implicit [[DEF29]]
; GFX90A-NEXT: [[DEF30:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF31:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF32:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2260,7 +2259,6 @@ body: |
; GFX90A-NEXT: [[DEF37:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF38:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF39:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF30]], implicit [[DEF31]], implicit [[DEF32]], implicit [[DEF33]], implicit [[DEF34]], implicit [[DEF35]], implicit [[DEF36]], implicit [[DEF37]], implicit [[DEF38]], implicit [[DEF39]]
; GFX90A-NEXT: [[DEF40:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF41:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF42:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2271,7 +2269,6 @@ body: |
; GFX90A-NEXT: [[DEF47:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF48:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF49:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF40]], implicit [[DEF41]], implicit [[DEF42]], implicit [[DEF43]], implicit [[DEF44]], implicit [[DEF45]], implicit [[DEF46]], implicit [[DEF47]], implicit [[DEF48]], implicit [[DEF49]]
; GFX90A-NEXT: [[DEF50:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF51:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF52:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2282,7 +2279,6 @@ body: |
; GFX90A-NEXT: [[DEF57:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF58:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF59:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF50]], implicit [[DEF51]], implicit [[DEF52]], implicit [[DEF53]], implicit [[DEF54]], implicit [[DEF55]], implicit [[DEF56]], implicit [[DEF57]], implicit [[DEF58]], implicit [[DEF59]]
; GFX90A-NEXT: [[DEF60:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF61:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF62:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2293,7 +2289,6 @@ body: |
; GFX90A-NEXT: [[DEF67:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF68:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF69:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF60]], implicit [[DEF61]], implicit [[DEF62]], implicit [[DEF63]], implicit [[DEF64]], implicit [[DEF65]], implicit [[DEF66]], implicit [[DEF67]], implicit [[DEF68]], implicit [[DEF69]]
; GFX90A-NEXT: [[DEF70:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF71:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF72:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2304,7 +2299,6 @@ body: |
; GFX90A-NEXT: [[DEF77:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF78:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF79:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF70]], implicit [[DEF71]], implicit [[DEF72]], implicit [[DEF73]], implicit [[DEF74]], implicit [[DEF75]], implicit [[DEF76]], implicit [[DEF77]], implicit [[DEF78]], implicit [[DEF79]]
; GFX90A-NEXT: [[DEF80:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF81:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF82:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2315,7 +2309,6 @@ body: |
; GFX90A-NEXT: [[DEF87:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF88:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF89:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF80]], implicit [[DEF81]], implicit [[DEF82]], implicit [[DEF83]], implicit [[DEF84]], implicit [[DEF85]], implicit [[DEF86]], implicit [[DEF87]], implicit [[DEF88]], implicit [[DEF89]]
; GFX90A-NEXT: [[DEF90:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF91:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF92:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2326,7 +2319,6 @@ body: |
; GFX90A-NEXT: [[DEF97:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF98:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF99:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF90]], implicit [[DEF91]], implicit [[DEF92]], implicit [[DEF93]], implicit [[DEF94]], implicit [[DEF95]], implicit [[DEF96]], implicit [[DEF97]], implicit [[DEF98]], implicit [[DEF99]]
; GFX90A-NEXT: [[DEF100:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF101:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF102:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2337,7 +2329,6 @@ body: |
; GFX90A-NEXT: [[DEF107:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF108:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF109:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF100]], implicit [[DEF101]], implicit [[DEF102]], implicit [[DEF103]], implicit [[DEF104]], implicit [[DEF105]], implicit [[DEF106]], implicit [[DEF107]], implicit [[DEF108]], implicit [[DEF109]]
; GFX90A-NEXT: [[DEF110:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF111:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF112:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2348,7 +2339,6 @@ body: |
; GFX90A-NEXT: [[DEF117:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF118:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF119:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF110]], implicit [[DEF111]], implicit [[DEF112]], implicit [[DEF113]], implicit [[DEF114]], implicit [[DEF115]], implicit [[DEF116]], implicit [[DEF117]], implicit [[DEF118]], implicit [[DEF119]]
; GFX90A-NEXT: [[DEF120:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF121:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF122:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2359,7 +2349,6 @@ body: |
; GFX90A-NEXT: [[DEF127:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF128:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF129:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF120]], implicit [[DEF121]], implicit [[DEF122]], implicit [[DEF123]], implicit [[DEF124]], implicit [[DEF125]], implicit [[DEF126]], implicit [[DEF127]], implicit [[DEF128]], implicit [[DEF129]]
; GFX90A-NEXT: [[DEF130:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF131:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF132:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2370,7 +2359,6 @@ body: |
; GFX90A-NEXT: [[DEF137:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF138:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF139:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF130]], implicit [[DEF131]], implicit [[DEF132]], implicit [[DEF133]], implicit [[DEF134]], implicit [[DEF135]], implicit [[DEF136]], implicit [[DEF137]], implicit [[DEF138]], implicit [[DEF139]]
; GFX90A-NEXT: [[DEF140:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF141:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF142:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2381,7 +2369,6 @@ body: |
; GFX90A-NEXT: [[DEF147:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF148:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF149:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF140]], implicit [[DEF141]], implicit [[DEF142]], implicit [[DEF143]], implicit [[DEF144]], implicit [[DEF145]], implicit [[DEF146]], implicit [[DEF147]], implicit [[DEF148]], implicit [[DEF149]]
; GFX90A-NEXT: [[DEF150:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF151:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF152:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2392,7 +2379,6 @@ body: |
; GFX90A-NEXT: [[DEF157:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF158:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF159:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF150]], implicit [[DEF151]], implicit [[DEF152]], implicit [[DEF153]], implicit [[DEF154]], implicit [[DEF155]], implicit [[DEF156]], implicit [[DEF157]], implicit [[DEF158]], implicit [[DEF159]]
; GFX90A-NEXT: [[DEF160:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF161:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF162:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2403,7 +2389,6 @@ body: |
; GFX90A-NEXT: [[DEF167:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF168:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF169:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF160]], implicit [[DEF161]], implicit [[DEF162]], implicit [[DEF163]], implicit [[DEF164]], implicit [[DEF165]], implicit [[DEF166]], implicit [[DEF167]], implicit [[DEF168]], implicit [[DEF169]]
; GFX90A-NEXT: [[DEF170:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF171:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF172:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2414,7 +2399,6 @@ body: |
; GFX90A-NEXT: [[DEF177:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF178:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF179:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF170]], implicit [[DEF171]], implicit [[DEF172]], implicit [[DEF173]], implicit [[DEF174]], implicit [[DEF175]], implicit [[DEF176]], implicit [[DEF177]], implicit [[DEF178]], implicit [[DEF179]]
; GFX90A-NEXT: [[DEF180:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF181:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF182:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2425,7 +2409,6 @@ body: |
; GFX90A-NEXT: [[DEF187:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF188:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF189:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF180]], implicit [[DEF181]], implicit [[DEF182]], implicit [[DEF183]], implicit [[DEF184]], implicit [[DEF185]], implicit [[DEF186]], implicit [[DEF187]], implicit [[DEF188]], implicit [[DEF189]]
; GFX90A-NEXT: [[DEF190:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF191:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF192:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2436,7 +2419,6 @@ body: |
; GFX90A-NEXT: [[DEF197:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF198:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF199:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF190]], implicit [[DEF191]], implicit [[DEF192]], implicit [[DEF193]], implicit [[DEF194]], implicit [[DEF195]], implicit [[DEF196]], implicit [[DEF197]], implicit [[DEF198]], implicit [[DEF199]]
; GFX90A-NEXT: [[DEF200:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF201:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF202:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2447,7 +2429,6 @@ body: |
; GFX90A-NEXT: [[DEF207:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF208:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF209:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF200]], implicit [[DEF201]], implicit [[DEF202]], implicit [[DEF203]], implicit [[DEF204]], implicit [[DEF205]], implicit [[DEF206]], implicit [[DEF207]], implicit [[DEF208]], implicit [[DEF209]]
; GFX90A-NEXT: [[DEF210:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF211:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF212:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2458,7 +2439,6 @@ body: |
; GFX90A-NEXT: [[DEF217:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF218:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF219:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF210]], implicit [[DEF211]], implicit [[DEF212]], implicit [[DEF213]], implicit [[DEF214]], implicit [[DEF215]], implicit [[DEF216]], implicit [[DEF217]], implicit [[DEF218]], implicit [[DEF219]]
; GFX90A-NEXT: [[DEF220:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF221:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF222:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2469,7 +2449,6 @@ body: |
; GFX90A-NEXT: [[DEF227:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF228:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF229:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF220]], implicit [[DEF221]], implicit [[DEF222]], implicit [[DEF223]], implicit [[DEF224]], implicit [[DEF225]], implicit [[DEF226]], implicit [[DEF227]], implicit [[DEF228]], implicit [[DEF229]]
; GFX90A-NEXT: [[DEF230:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF231:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF232:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2480,28 +2459,53 @@ body: |
; GFX90A-NEXT: [[DEF237:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF238:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF239:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF230]], implicit [[DEF231]], implicit [[DEF232]], implicit [[DEF233]], implicit [[DEF234]], implicit [[DEF235]], implicit [[DEF236]], implicit [[DEF237]], implicit [[DEF238]], implicit [[DEF239]]
; GFX90A-NEXT: [[DEF240:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF241:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF242:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF243:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF244:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF245:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 257, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 258, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: bb.1:
; GFX90A-NEXT: [[DEF246:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF247:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF248:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF249:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF240]], implicit [[DEF241]], implicit [[DEF242]], implicit [[DEF243]], implicit [[DEF244]], implicit [[DEF245]], implicit [[DEF246]], implicit [[DEF247]], implicit [[DEF248]], implicit [[DEF249]]
; GFX90A-NEXT: [[DEF250:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF251:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF252:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF253:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF254:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF255:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF246]], implicit [[DEF247]], implicit [[DEF248]], implicit [[DEF249]], implicit [[DEF250]], implicit [[DEF251]], implicit [[DEF252]], implicit [[DEF253]], implicit [[DEF254]], implicit [[DEF255]]
; GFX90A-NEXT: [[DEF256:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 257, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 258, implicit $exec, implicit $mode
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF250]], implicit [[DEF251]], implicit [[DEF252]], implicit [[DEF253]], implicit [[DEF254]], implicit [[DEF255]], implicit [[DEF256]], implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF256]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]], implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]], implicit [[DEF18]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]], implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]], implicit [[DEF28]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF29]], implicit [[DEF30]], implicit [[DEF31]], implicit [[DEF32]], implicit [[DEF33]], implicit [[DEF34]], implicit [[DEF35]], implicit [[DEF36]], implicit [[DEF37]], implicit [[DEF38]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF39]], implicit [[DEF40]], implicit [[DEF41]], implicit [[DEF42]], implicit [[DEF43]], implicit [[DEF44]], implicit [[DEF45]], implicit [[DEF46]], implicit [[DEF47]], implicit [[DEF48]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF49]], implicit [[DEF50]], implicit [[DEF51]], implicit [[DEF52]], implicit [[DEF53]], implicit [[DEF54]], implicit [[DEF55]], implicit [[DEF56]], implicit [[DEF57]], implicit [[DEF58]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF59]], implicit [[DEF60]], implicit [[DEF61]], implicit [[DEF62]], implicit [[DEF63]], implicit [[DEF64]], implicit [[DEF65]], implicit [[DEF66]], implicit [[DEF67]], implicit [[DEF68]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF69]], implicit [[DEF70]], implicit [[DEF71]], implicit [[DEF72]], implicit [[DEF73]], implicit [[DEF74]], implicit [[DEF75]], implicit [[DEF76]], implicit [[DEF77]], implicit [[DEF78]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF79]], implicit [[DEF80]], implicit [[DEF81]], implicit [[DEF82]], implicit [[DEF83]], implicit [[DEF84]], implicit [[DEF85]], implicit [[DEF86]], implicit [[DEF87]], implicit [[DEF88]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF89]], implicit [[DEF90]], implicit [[DEF91]], implicit [[DEF92]], implicit [[DEF93]], implicit [[DEF94]], implicit [[DEF95]], implicit [[DEF96]], implicit [[DEF97]], implicit [[DEF98]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF99]], implicit [[DEF100]], implicit [[DEF101]], implicit [[DEF102]], implicit [[DEF103]], implicit [[DEF104]], implicit [[DEF105]], implicit [[DEF106]], implicit [[DEF107]], implicit [[DEF108]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF109]], implicit [[DEF110]], implicit [[DEF111]], implicit [[DEF112]], implicit [[DEF113]], implicit [[DEF114]], implicit [[DEF115]], implicit [[DEF116]], implicit [[DEF117]], implicit [[DEF118]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF119]], implicit [[DEF120]], implicit [[DEF121]], implicit [[DEF122]], implicit [[DEF123]], implicit [[DEF124]], implicit [[DEF125]], implicit [[DEF126]], implicit [[DEF127]], implicit [[DEF128]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF129]], implicit [[DEF130]], implicit [[DEF131]], implicit [[DEF132]], implicit [[DEF133]], implicit [[DEF134]], implicit [[DEF135]], implicit [[DEF136]], implicit [[DEF137]], implicit [[DEF138]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF139]], implicit [[DEF140]], implicit [[DEF141]], implicit [[DEF142]], implicit [[DEF143]], implicit [[DEF144]], implicit [[DEF145]], implicit [[DEF146]], implicit [[DEF147]], implicit [[DEF148]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF149]], implicit [[DEF150]], implicit [[DEF151]], implicit [[DEF152]], implicit [[DEF153]], implicit [[DEF154]], implicit [[DEF155]], implicit [[DEF156]], implicit [[DEF157]], implicit [[DEF158]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF159]], implicit [[DEF160]], implicit [[DEF161]], implicit [[DEF162]], implicit [[DEF163]], implicit [[DEF164]], implicit [[DEF165]], implicit [[DEF166]], implicit [[DEF167]], implicit [[DEF168]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF169]], implicit [[DEF170]], implicit [[DEF171]], implicit [[DEF172]], implicit [[DEF173]], implicit [[DEF174]], implicit [[DEF175]], implicit [[DEF176]], implicit [[DEF177]], implicit [[DEF178]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF179]], implicit [[DEF180]], implicit [[DEF181]], implicit [[DEF182]], implicit [[DEF183]], implicit [[DEF184]], implicit [[DEF185]], implicit [[DEF186]], implicit [[DEF187]], implicit [[DEF188]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF189]], implicit [[DEF190]], implicit [[DEF191]], implicit [[DEF192]], implicit [[DEF193]], implicit [[DEF194]], implicit [[DEF195]], implicit [[DEF196]], implicit [[DEF197]], implicit [[DEF198]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF199]], implicit [[DEF200]], implicit [[DEF201]], implicit [[DEF202]], implicit [[DEF203]], implicit [[DEF204]], implicit [[DEF205]], implicit [[DEF206]], implicit [[DEF207]], implicit [[DEF208]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF209]], implicit [[DEF210]], implicit [[DEF211]], implicit [[DEF212]], implicit [[DEF213]], implicit [[DEF214]], implicit [[DEF215]], implicit [[DEF216]], implicit [[DEF217]], implicit [[DEF218]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF219]], implicit [[DEF220]], implicit [[DEF221]], implicit [[DEF222]], implicit [[DEF223]], implicit [[DEF224]], implicit [[DEF225]], implicit [[DEF226]], implicit [[DEF227]], implicit [[DEF228]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF229]], implicit [[DEF230]], implicit [[DEF231]], implicit [[DEF232]], implicit [[DEF233]], implicit [[DEF234]], implicit [[DEF235]], implicit [[DEF236]], implicit [[DEF237]], implicit [[DEF238]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF239]], implicit [[DEF240]], implicit [[DEF241]], implicit [[DEF242]], implicit [[DEF243]], implicit [[DEF244]], implicit [[DEF245]], implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
; GFX90A-NEXT: S_ENDPGM 0
bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
index 0bf9e0b52207d..6a910e322c8cf 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
@@ -35,6 +35,7 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
@@ -50,7 +51,6 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_14]], implicit [[V_CVT_I32_F64_e32_15]]
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]]
@@ -351,15 +351,15 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_22]]
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_24]], implicit [[V_CVT_I32_F64_e32_22]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
@@ -589,29 +589,29 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_22]]
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_24]], implicit [[V_CVT_I32_F64_e32_23]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_24]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
; GFX908-NEXT: successors: %bb.3(0x80000000)
; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode
; GFX908-NEXT: S_NOP 0
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.3:
; GFX908-NEXT: successors: %bb.4(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]]
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_26]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_25]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.4:
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
@@ -1095,14 +1095,95 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+ ; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
+ ; GFX908-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 2
+ ; GFX908-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 3
+ ; GFX908-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 4
+ ; GFX908-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 5
+ ; GFX908-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 6
+ ; GFX908-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 7
+ ; GFX908-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 8
+ ; GFX908-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 9
+ ; GFX908-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 10
+ ; GFX908-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
+ ; GFX908-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 12
+ ; GFX908-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 13
+ ; GFX908-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 14
+ ; GFX908-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 15
+ ; GFX908-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 16
+ ; GFX908-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 17
+ ; GFX908-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 18
+ ; GFX908-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 19
+ ; GFX908-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 20
+ ; GFX908-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 21
+ ; GFX908-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
+ ; GFX908-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
+ ; GFX908-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
+ ; GFX908-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
+ ; GFX908-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
+ ; GFX908-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
+ ; GFX908-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
+ ; GFX908-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
+ ; GFX908-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 30
+ ; GFX908-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 31
+ ; GFX908-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 32
+ ; GFX908-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 33
+ ; GFX908-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 34
+ ; GFX908-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 35
+ ; GFX908-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 36
+ ; GFX908-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 37
+ ; GFX908-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 38
+ ; GFX908-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 39
+ ; GFX908-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 40
+ ; GFX908-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 41
+ ; GFX908-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 42
+ ; GFX908-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 43
+ ; GFX908-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 44
+ ; GFX908-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 45
+ ; GFX908-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 46
+ ; GFX908-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
+ ; GFX908-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 48
+ ; GFX908-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 49
+ ; GFX908-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 50
+ ; GFX908-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 51
+ ; GFX908-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 52
+ ; GFX908-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 53
+ ; GFX908-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 54
+ ; GFX908-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 55
+ ; GFX908-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 56
+ ; GFX908-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 57
+ ; GFX908-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 58
+ ; GFX908-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 59
+ ; GFX908-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 60
+ ; GFX908-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 61
+ ; GFX908-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 62
+ ; GFX908-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 63
+ ; GFX908-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 64
+ ; GFX908-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 65
+ ; GFX908-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 66
+ ; GFX908-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 67
+ ; GFX908-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 68
+ ; GFX908-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
+ ; GFX908-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
+ ; GFX908-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[S_MOV_B32_80:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
+ ; GFX908-NEXT: [[S_MOV_B32_81:%[0-9]+]]:sgpr_32 = S_MOV_B32 80
; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
@@ -1156,127 +1237,46 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_32]]
- ; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
- ; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]]
- ; GFX908-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 2
- ; GFX908-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 3
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
- ; GFX908-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 4
- ; GFX908-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 5
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_5]], implicit [[S_MOV_B32_6]]
- ; GFX908-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 6
- ; GFX908-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 7
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_7]], implicit [[S_MOV_B32_8]]
- ; GFX908-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 8
- ; GFX908-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 9
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_9]], implicit [[S_MOV_B32_10]]
- ; GFX908-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 10
- ; GFX908-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_11]], implicit [[S_MOV_B32_12]]
- ; GFX908-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 12
- ; GFX908-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 13
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_13]], implicit [[S_MOV_B32_14]]
- ; GFX908-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 14
- ; GFX908-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 15
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_15]], implicit [[S_MOV_B32_16]]
- ; GFX908-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 16
- ; GFX908-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 17
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_17]], implicit [[S_MOV_B32_18]]
- ; GFX908-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 18
- ; GFX908-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 19
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_19]], implicit [[S_MOV_B32_20]]
- ; GFX908-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 20
- ; GFX908-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 21
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_21]], implicit [[S_MOV_B32_22]]
- ; GFX908-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
- ; GFX908-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_23]], implicit [[S_MOV_B32_24]]
- ; GFX908-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
- ; GFX908-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_25]], implicit [[S_MOV_B32_26]]
- ; GFX908-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
- ; GFX908-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_27]], implicit [[S_MOV_B32_28]]
- ; GFX908-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
- ; GFX908-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_29]], implicit [[S_MOV_B32_30]]
- ; GFX908-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 30
- ; GFX908-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 31
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_31]], implicit [[S_MOV_B32_32]]
- ; GFX908-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 32
- ; GFX908-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 33
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_33]], implicit [[S_MOV_B32_34]]
- ; GFX908-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 34
- ; GFX908-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 35
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_35]], implicit [[S_MOV_B32_36]]
- ; GFX908-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 36
- ; GFX908-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 37
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_37]], implicit [[S_MOV_B32_38]]
- ; GFX908-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 38
- ; GFX908-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 39
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_39]], implicit [[S_MOV_B32_40]]
- ; GFX908-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 40
- ; GFX908-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 41
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_41]], implicit [[S_MOV_B32_42]]
- ; GFX908-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 42
- ; GFX908-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 43
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_43]], implicit [[S_MOV_B32_44]]
- ; GFX908-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 44
- ; GFX908-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 45
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_45]], implicit [[S_MOV_B32_46]]
- ; GFX908-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 46
- ; GFX908-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_47]], implicit [[S_MOV_B32_48]]
- ; GFX908-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 48
- ; GFX908-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 49
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_49]], implicit [[S_MOV_B32_50]]
- ; GFX908-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 50
- ; GFX908-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 51
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_51]], implicit [[S_MOV_B32_52]]
- ; GFX908-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 52
- ; GFX908-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 53
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_53]], implicit [[S_MOV_B32_54]]
- ; GFX908-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 54
- ; GFX908-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 55
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_55]], implicit [[S_MOV_B32_56]]
- ; GFX908-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 56
- ; GFX908-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 57
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_57]], implicit [[S_MOV_B32_58]]
- ; GFX908-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 58
- ; GFX908-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 59
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_59]], implicit [[S_MOV_B32_60]]
- ; GFX908-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 60
- ; GFX908-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 61
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_61]], implicit [[S_MOV_B32_62]]
- ; GFX908-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 62
- ; GFX908-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 63
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_63]], implicit [[S_MOV_B32_64]]
- ; GFX908-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 64
- ; GFX908-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 65
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_65]], implicit [[S_MOV_B32_66]]
- ; GFX908-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 66
- ; GFX908-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 67
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_67]], implicit [[S_MOV_B32_68]]
- ; GFX908-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 68
- ; GFX908-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_69]], implicit [[S_MOV_B32_70]]
- ; GFX908-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
- ; GFX908-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_71]], implicit [[S_MOV_B32_72]]
- ; GFX908-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
- ; GFX908-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_73]], implicit [[S_MOV_B32_74]]
- ; GFX908-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
- ; GFX908-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_75]], implicit [[S_MOV_B32_76]]
- ; GFX908-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
- ; GFX908-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_77]], implicit [[S_MOV_B32_78]]
- ; GFX908-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
- ; GFX908-NEXT: [[S_MOV_B32_80:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_79]], implicit [[S_MOV_B32_80]]
- ; GFX908-NEXT: [[S_MOV_B32_81:%[0-9]+]]:sgpr_32 = S_MOV_B32 80
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_81]]
; GFX908-NEXT: S_ENDPGM 0
bb.0:
@@ -1525,17 +1525,17 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_24]], implicit [[V_CVT_I32_F64_e32_25]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_5]]
@@ -1638,7 +1638,80 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 8
+ ; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 9
+ ; GFX908-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 10
+ ; GFX908-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
+ ; GFX908-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 12
+ ; GFX908-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 13
+ ; GFX908-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 14
+ ; GFX908-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 15
+ ; GFX908-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 16
+ ; GFX908-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 17
+ ; GFX908-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 18
+ ; GFX908-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 19
+ ; GFX908-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 20
+ ; GFX908-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 21
+ ; GFX908-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
+ ; GFX908-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
+ ; GFX908-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
+ ; GFX908-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
+ ; GFX908-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
+ ; GFX908-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
+ ; GFX908-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
+ ; GFX908-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
+ ; GFX908-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 30
+ ; GFX908-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 31
+ ; GFX908-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 32
+ ; GFX908-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 33
+ ; GFX908-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 34
+ ; GFX908-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 35
+ ; GFX908-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 36
+ ; GFX908-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 37
+ ; GFX908-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 38
+ ; GFX908-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 39
+ ; GFX908-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 40
+ ; GFX908-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 41
+ ; GFX908-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 42
+ ; GFX908-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 43
+ ; GFX908-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 44
+ ; GFX908-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 45
+ ; GFX908-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 46
+ ; GFX908-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
+ ; GFX908-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 48
+ ; GFX908-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 49
+ ; GFX908-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 50
+ ; GFX908-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 51
+ ; GFX908-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 52
+ ; GFX908-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 53
+ ; GFX908-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 54
+ ; GFX908-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 55
+ ; GFX908-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 56
+ ; GFX908-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 57
+ ; GFX908-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 58
+ ; GFX908-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 59
+ ; GFX908-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 60
+ ; GFX908-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 61
+ ; GFX908-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 62
+ ; GFX908-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 63
+ ; GFX908-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 64
+ ; GFX908-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 65
+ ; GFX908-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 66
+ ; GFX908-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 67
+ ; GFX908-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 68
+ ; GFX908-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
+ ; GFX908-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
+ ; GFX908-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
+ ; GFX908-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
+ ; GFX908-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
+ ; GFX908-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
+ ; GFX908-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
+ ; GFX908-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
+ ; GFX908-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
+ ; GFX908-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
+ ; GFX908-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 80
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: {{ $}}
@@ -1685,128 +1758,55 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_9]], implicit [[V_CVT_I32_F64_e32_19]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_22]]
- ; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
- ; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
+ ; GFX908-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+ ; GFX908-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_74]], implicit [[S_MOV_B32_75]]
+ ; GFX908-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 2
+ ; GFX908-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 3
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_76]], implicit [[S_MOV_B32_77]]
+ ; GFX908-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 4
+ ; GFX908-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 5
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_78]], implicit [[S_MOV_B32_79]]
+ ; GFX908-NEXT: [[S_MOV_B32_80:%[0-9]+]]:sgpr_32 = S_MOV_B32 6
+ ; GFX908-NEXT: [[S_MOV_B32_81:%[0-9]+]]:sgpr_32 = S_MOV_B32 7
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_80]], implicit [[S_MOV_B32_81]]
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]]
- ; GFX908-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 2
- ; GFX908-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 3
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
- ; GFX908-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 4
- ; GFX908-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 5
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_5]], implicit [[S_MOV_B32_6]]
- ; GFX908-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 6
- ; GFX908-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 7
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_7]], implicit [[S_MOV_B32_8]]
- ; GFX908-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 8
- ; GFX908-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 9
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_9]], implicit [[S_MOV_B32_10]]
- ; GFX908-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 10
- ; GFX908-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_11]], implicit [[S_MOV_B32_12]]
- ; GFX908-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 12
- ; GFX908-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 13
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_13]], implicit [[S_MOV_B32_14]]
- ; GFX908-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 14
- ; GFX908-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 15
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_15]], implicit [[S_MOV_B32_16]]
- ; GFX908-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 16
- ; GFX908-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 17
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_17]], implicit [[S_MOV_B32_18]]
- ; GFX908-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 18
- ; GFX908-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 19
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_19]], implicit [[S_MOV_B32_20]]
- ; GFX908-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 20
- ; GFX908-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 21
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_21]], implicit [[S_MOV_B32_22]]
- ; GFX908-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
- ; GFX908-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_23]], implicit [[S_MOV_B32_24]]
- ; GFX908-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
- ; GFX908-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_25]], implicit [[S_MOV_B32_26]]
- ; GFX908-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
- ; GFX908-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_27]], implicit [[S_MOV_B32_28]]
- ; GFX908-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
- ; GFX908-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_29]], implicit [[S_MOV_B32_30]]
- ; GFX908-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 30
- ; GFX908-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 31
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_31]], implicit [[S_MOV_B32_32]]
- ; GFX908-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 32
- ; GFX908-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 33
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_33]], implicit [[S_MOV_B32_34]]
- ; GFX908-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 34
- ; GFX908-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 35
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_35]], implicit [[S_MOV_B32_36]]
- ; GFX908-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 36
- ; GFX908-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 37
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_37]], implicit [[S_MOV_B32_38]]
- ; GFX908-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 38
- ; GFX908-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 39
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_39]], implicit [[S_MOV_B32_40]]
- ; GFX908-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 40
- ; GFX908-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 41
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_41]], implicit [[S_MOV_B32_42]]
- ; GFX908-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 42
- ; GFX908-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 43
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_43]], implicit [[S_MOV_B32_44]]
- ; GFX908-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 44
- ; GFX908-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 45
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_45]], implicit [[S_MOV_B32_46]]
- ; GFX908-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 46
- ; GFX908-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_47]], implicit [[S_MOV_B32_48]]
- ; GFX908-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 48
- ; GFX908-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 49
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_49]], implicit [[S_MOV_B32_50]]
- ; GFX908-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 50
- ; GFX908-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 51
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_51]], implicit [[S_MOV_B32_52]]
- ; GFX908-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 52
- ; GFX908-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 53
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_53]], implicit [[S_MOV_B32_54]]
- ; GFX908-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 54
- ; GFX908-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 55
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_55]], implicit [[S_MOV_B32_56]]
- ; GFX908-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 56
- ; GFX908-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 57
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_57]], implicit [[S_MOV_B32_58]]
- ; GFX908-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 58
- ; GFX908-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 59
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_59]], implicit [[S_MOV_B32_60]]
- ; GFX908-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 60
- ; GFX908-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 61
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_61]], implicit [[S_MOV_B32_62]]
- ; GFX908-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 62
- ; GFX908-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 63
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_63]], implicit [[S_MOV_B32_64]]
- ; GFX908-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 64
- ; GFX908-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 65
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_65]], implicit [[S_MOV_B32_66]]
- ; GFX908-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 66
- ; GFX908-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 67
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_67]], implicit [[S_MOV_B32_68]]
- ; GFX908-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 68
- ; GFX908-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_69]], implicit [[S_MOV_B32_70]]
- ; GFX908-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
- ; GFX908-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_71]], implicit [[S_MOV_B32_72]]
- ; GFX908-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
- ; GFX908-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_73]], implicit [[S_MOV_B32_74]]
- ; GFX908-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
- ; GFX908-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_75]], implicit [[S_MOV_B32_76]]
- ; GFX908-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
- ; GFX908-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_77]], implicit [[S_MOV_B32_78]]
- ; GFX908-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
- ; GFX908-NEXT: [[S_MOV_B32_80:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_79]], implicit [[S_MOV_B32_80]]
- ; GFX908-NEXT: [[S_MOV_B32_81:%[0-9]+]]:sgpr_32 = S_MOV_B32 80
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_81]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_73]]
; GFX908-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr0_sgpr1
@@ -2457,30 +2457,30 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_24]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
; GFX908-NEXT: successors: %bb.3(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_24]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]]
; GFX908-NEXT: S_NOP 0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.3:
; GFX908-NEXT: successors: %bb.4(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.4:
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_27]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_5]]
@@ -5669,13 +5669,13 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_24]], implicit [[V_CVT_I32_F64_e32_23]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
@@ -5981,6 +5981,7 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_F64_I32_e32_:%[0-9]+]]:vreg_64 = nofpexcept V_CVT_F64_I32_e32 22, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
@@ -5989,7 +5990,6 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
- ; GFX908-NEXT: [[V_CVT_F64_I32_e32_:%[0-9]+]]:vreg_64 = nofpexcept V_CVT_F64_I32_e32 22, implicit $exec, implicit $mode
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_F64_I32_e32_]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
@@ -6088,12 +6088,12 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_F64_I32_e32_:%[0-9]+]]:vreg_64 = nofpexcept V_CVT_F64_I32_e32 22, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_F64_I32_e32_:%[0-9]+]]:vreg_64 = nofpexcept V_CVT_F64_I32_e32 22, implicit $exec, implicit $mode
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_F64_I32_e32_]], implicit [[V_CVT_I32_F64_e32_22]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
@@ -6514,55 +6514,56 @@ body: |
; GFX908: bb.0:
; GFX908-NEXT: successors: %bb.1(0x80000000)
; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
+ ; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: dead [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: dead [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
; GFX908-NEXT: successors: %bb.3(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_5]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]], implicit [[V_CVT_I32_F64_e32_10]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]], implicit [[V_CVT_I32_F64_e32_15]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]], implicit [[V_CVT_I32_F64_e32_20]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]], implicit [[V_CVT_I32_F64_e32_25]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.3:
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_]]
; GFX908-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1
@@ -6919,6 +6920,7 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
@@ -6929,7 +6931,6 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
@@ -7055,14 +7056,14 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]]
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_34:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 34, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_34:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 34, implicit $exec, implicit $mode
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]]
@@ -7179,6 +7180,9 @@ body: |
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode
+ ; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
; GFX908-NEXT: successors: %bb.3(0x80000000)
; GFX908-NEXT: {{ $}}
@@ -7188,8 +7192,6 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.3:
@@ -7280,9 +7282,10 @@ body: |
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
; GFX908-NEXT: $vgpr8 = IMPLICIT_DEF
; GFX908-NEXT: $vgpr9 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
@@ -7299,61 +7302,60 @@ body: |
; GFX908-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
+ ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 $vgpr8, implicit $exec, implicit $mode
; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 $vgpr9, implicit $exec, implicit $mode
- ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
- ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
- ; GFX908-NEXT: dead [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: S_BRANCH %bb.1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_11]]
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF29]]
; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_31]], implicit [[DEF30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_31]], implicit [[DEF30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF29]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_12]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_13]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_6]], implicit [[V_CVT_I32_F32_e32_14]]
@@ -7480,7 +7482,8 @@ body: |
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- ; GFX908-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: dead [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
@@ -7497,65 +7500,64 @@ body: |
; GFX908-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
+ ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
- ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
- ; GFX908-NEXT: dead [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
- ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: $vgpr8 = IMPLICIT_DEF
; GFX908-NEXT: $vgpr9 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 2, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
; GFX908-NEXT: S_BRANCH %bb.1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 255
; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 [[S_MOV_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_9]]
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_28]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF29]]
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 2, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_30]], implicit [[V_CVT_I32_F32_e32_31]], implicit [[DEF30]]
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_31]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_28]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF30]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_10]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_11]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_12]]
@@ -7736,32 +7738,32 @@ body: |
; GFX908-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
; GFX908-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
- ; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF31:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: S_BRANCH %bb.1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_6]]
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_22]], implicit [[V_CVT_I32_F32_e32_23]], implicit [[DEF22]], implicit [[DEF27]]
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_24]], implicit [[V_CVT_I32_F32_e32_25]], implicit [[DEF23]], implicit [[DEF28]]
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_26]], implicit [[V_CVT_I32_F32_e32_27]], implicit [[DEF24]], implicit [[DEF29]]
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_30]], implicit [[V_CVT_I32_F32_e32_25]], implicit [[DEF30]], implicit [[DEF25]]
; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_28]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF25]], implicit [[DEF30]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_30]], implicit [[V_CVT_I32_F32_e32_31]], implicit [[DEF26]], implicit [[DEF31]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_31]], implicit [[V_CVT_I32_F32_e32_26]], implicit [[DEF31]], implicit [[DEF26]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_22]], implicit [[V_CVT_I32_F32_e32_27]], implicit [[DEF22]], implicit [[DEF27]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF23]], implicit [[DEF28]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_24]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF24]], implicit [[DEF29]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_7]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_8]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_9]]
@@ -8082,58 +8084,58 @@ body: |
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- ; GFX908-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: undef [[DEF:%[0-9]+]].sub0:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF:%[0-9]+]].sub1:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF:%[0-9]+]].sub2:vreg_512 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF:%[0-9]+]].sub3:vreg_512 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: dead [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
+ ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
- ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
- ; GFX908-NEXT: undef [[DEF22:%[0-9]+]].sub1:vreg_512 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
- ; GFX908-NEXT: undef [[DEF22:%[0-9]+]].sub0:vreg_512 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF22:%[0-9]+]].sub2:vreg_512 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF22:%[0-9]+]].sub3:vreg_512 = IMPLICIT_DEF
- ; GFX908-NEXT: dead [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
@@ -8142,25 +8144,25 @@ body: |
; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF31:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]].sub2, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
; GFX908-NEXT: S_BRANCH %bb.1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_6]]
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]].sub0, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]].sub2, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_22]], implicit [[V_CVT_I32_F32_e32_23]], implicit [[DEF22]].sub0, implicit [[DEF22]].sub2
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_24]], implicit [[V_CVT_I32_F32_e32_25]], implicit [[DEF24]], implicit [[DEF28]]
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_26]], implicit [[V_CVT_I32_F32_e32_27]], implicit [[DEF25]], implicit [[DEF29]]
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_28]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF26]], implicit [[DEF30]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_30]], implicit [[V_CVT_I32_F32_e32_31]], implicit [[DEF27]], implicit [[DEF31]]
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]].sub0, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_31]], implicit [[V_CVT_I32_F32_e32_26]], implicit [[DEF]].sub0, implicit [[DEF]].sub2
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_22]], implicit [[V_CVT_I32_F32_e32_27]], implicit [[DEF24]], implicit [[DEF28]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF25]], implicit [[DEF29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_24]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF26]], implicit [[DEF30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_25]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF27]], implicit [[DEF31]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_7]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_8]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_9]]
@@ -8553,24 +8555,24 @@ body: |
; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF31:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
; GFX908-NEXT: undef [[V_CVT_I32_F32_e32_1:%[0-9]+]].sub0:vreg_64 = nofpexcept V_CVT_I32_F32_e32 [[DEF]].sub0, implicit $exec, implicit $mode
; GFX908-NEXT: S_BRANCH %bb.1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_7]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_8]]
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]].sub2, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]].sub0, implicit [[V_CVT_I32_F32_e32_24]], implicit [[DEF]].sub0, implicit [[DEF]].sub2
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_25]], implicit [[DEF1]], implicit [[DEF28]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_26]], implicit [[DEF25]], implicit [[DEF29]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_27]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF26]], implicit [[DEF30]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_29]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF27]], implicit [[DEF31]]
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF]].sub2, implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]].sub0, implicit [[V_CVT_I32_F32_e32_27]], implicit [[DEF]].sub0, implicit [[DEF]].sub2
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF1]], implicit [[DEF28]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_24]], implicit [[DEF25]], implicit [[DEF29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_29]], implicit [[V_CVT_I32_F32_e32_25]], implicit [[DEF26]], implicit [[DEF30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_30]], implicit [[V_CVT_I32_F32_e32_26]], implicit [[DEF27]], implicit [[DEF31]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_9]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_10]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_6]], implicit [[V_CVT_I32_F32_e32_11]]
@@ -8739,6 +8741,7 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_41:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 51, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_42:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 52, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: undef [[V_RCP_F32_e32_:%[0-9]+]].sub0:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_RCP_F32_e32 [[DEF]].sub0, implicit $mode, implicit $exec
+ ; GFX908-NEXT: [[DEF2:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
@@ -8755,9 +8758,8 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_9]], implicit [[V_CVT_I32_F64_e32_19]], implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_39]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
- ; GFX908-NEXT: dead [[DEF2:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF3:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; GFX908-NEXT: undef [[V_FMA_F32_e64_:%[0-9]+]].sub0:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_FMA_F32_e64 0, [[DEF3]].sub1, 0, [[V_RCP_F32_e32_]].sub0, 0, [[DEF3]].sub0, 0, 0, implicit $mode, implicit $exec
+ ; GFX908-NEXT: dead [[DEF3:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; GFX908-NEXT: undef [[V_FMA_F32_e64_:%[0-9]+]].sub0:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_FMA_F32_e64 0, [[DEF2]].sub1, 0, [[V_RCP_F32_e32_]].sub0, 0, [[DEF2]].sub0, 0, 0, implicit $mode, implicit $exec
; GFX908-NEXT: %temp2:vreg_64_align2 = IMPLICIT_DEF
; GFX908-NEXT: [[V_PK_MUL_F32_:%[0-9]+]]:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_PK_MUL_F32 0, [[V_RCP_F32_e32_]], 8, [[DEF]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
; GFX908-NEXT: dead [[V_PK_FMA_F32_:%[0-9]+]]:vreg_64_align2 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_PK_FMA_F32 0, [[V_FMA_F32_e64_]], 8, %temp2, 11, [[V_PK_MUL_F32_]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
index ff43609bfec53..680942fcb4d4b 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
+++ b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
@@ -221,11 +221,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_mov_b32 s12, s51
; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
+; CHECK-NEXT: v_add_nc_u32_e32 v60, 1, v58
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; CHECK-NEXT: v_add_nc_u32_e32 v1, 1, v58
-; CHECK-NEXT: ds_write_b32 v0, v1
+; CHECK-NEXT: ds_write_b32 v0, v60
; CHECK-NEXT: .LBB0_12: ; in Loop: Header=BB0_8 Depth=2
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s81
; CHECK-NEXT: ds_read_u8 v0, v59 offset:2
@@ -247,11 +247,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_mov_b32 s12, s51
; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
+; CHECK-NEXT: v_add_nc_u32_e32 v60, 2, v58
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; CHECK-NEXT: v_add_nc_u32_e32 v1, 2, v58
-; CHECK-NEXT: ds_write_b32 v0, v1
+; CHECK-NEXT: ds_write_b32 v0, v60
; CHECK-NEXT: .LBB0_14: ; in Loop: Header=BB0_8 Depth=2
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s81
; CHECK-NEXT: ds_read_u8 v0, v59 offset:3
@@ -790,6 +790,7 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
; CHECK-NEXT: s_addc_u32 s13, s13, 0
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
+; CHECK-NEXT: s_load_dwordx2 s[54:55], s[8:9], 0x10
; CHECK-NEXT: s_add_u32 s0, s0, s17
; CHECK-NEXT: s_mov_b64 s[38:39], s[8:9]
; CHECK-NEXT: s_addc_u32 s1, s1, 0
@@ -843,24 +844,22 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
; CHECK-NEXT: ds_write_b32 v43, v43 offset:15360
; CHECK-NEXT: v_add_nc_u32_e32 v44, 0x3c04, v46
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; CHECK-NEXT: s_load_dwordx2 s[4:5], s[38:39], 0x10
; CHECK-NEXT: v_lshrrev_b32_e32 v0, 1, v42
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 2, v42
; CHECK-NEXT: v_mov_b32_e32 v31, v40
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z3minjj at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z3minjj at rel32@hi+12
-; CHECK-NEXT: s_mov_b64 s[6:7], s[36:37]
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
; CHECK-NEXT: v_and_b32_e32 v0, 0x7ffffffc, v0
; CHECK-NEXT: v_and_b32_e32 v1, 28, v1
+; CHECK-NEXT: s_mov_b64 s[6:7], s[36:37]
; CHECK-NEXT: s_mov_b64 s[8:9], s[52:53]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
+; CHECK-NEXT: global_load_dword v0, v0, s[54:55]
; CHECK-NEXT: s_mov_b32 s12, s51
; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
-; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: global_load_dword v0, v0, s[4:5]
-; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_bfe_u32 v0, v0, v1, 4
; CHECK-NEXT: v_mov_b32_e32 v1, 12
diff --git a/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir b/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
index 7eb6e82226c31..2cd78062ccbd7 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
@@ -49,6 +49,7 @@ body: |
; CHECK-NEXT: dead [[GLOBAL_LOAD_DWORD5:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF5]], 0, 0, implicit $exec
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 2, [[DEF2]], implicit $exec
; CHECK-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
@@ -66,7 +67,6 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: S_NOP 0, implicit [[COPY]]
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit [[DEF8]]
; CHECK-NEXT: S_ENDPGM 0
bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-ilp.mir b/llvm/test/CodeGen/AMDGPU/schedule-ilp.mir
index 28f36161727f7..f1a8af42e6347 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-ilp.mir
+++ b/llvm/test/CodeGen/AMDGPU/schedule-ilp.mir
@@ -29,15 +29,15 @@ body: |
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: $exec = S_OR_B64 $exec, [[DEF]], implicit-def $scc
- ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
- ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF1]], 4, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
- ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD2:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF1]], 8, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN [[DEF2]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
- ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN [[DEF3]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: $exec = S_OR_B64 $exec, [[DEF]], implicit-def $scc
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF3]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF3]], 4, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD2:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF3]], 8, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN [[DEF1]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN [[DEF2]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
; CHECK-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 -1, [[GLOBAL_LOAD_DWORD]], implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
diff --git a/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir b/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir
index dfcd90ba2a067..40be0c6b67ee9 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir
@@ -57,9 +57,9 @@ body: |
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_MOV_B32_e32_:%[0-9]+]].sub2:vreg_128 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: undef [[V_MOV_B32_e32_1:%[0-9]+]].sub2:vreg_128 = V_MOV_B32_e32 1786773504, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: undef [[V_MOV_B32_e32_1:%[0-9]+]].sub2:vreg_128 = V_MOV_B32_e32 1786773504, implicit $exec
; CHECK-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_1]].sub2
; CHECK-NEXT: S_NOP 0, implicit undef [[V_MOV_B32_e32_]].sub0
; CHECK-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_]].sub2
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-alignbit.ll b/llvm/test/CodeGen/AMDGPU/uniform-alignbit.ll
index 5de84eed410c6..fa0342bed513b 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-alignbit.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-alignbit.ll
@@ -11,11 +11,11 @@ define amdgpu_kernel void @uniform_build_vector(i64 %in, ptr addrspace(1) %out)
; GCN-NEXT: s_mov_b32 s6, s5
; GCN-NEXT: s_mov_b32 s7, s5
; GCN-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
-; GCN-NEXT: ; sched_barrier mask(0x00000000)
; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v1, 0
+; GCN-NEXT: ; sched_barrier mask(0x00000000)
; GCN-NEXT: v_and_b32_e32 v0, 1, v0
; GCN-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
-; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: global_store_dword v1, v0, s[2:3]
; GCN-NEXT: s_endpgm
entry:
diff --git a/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll b/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll
index d24d993d335df..b045c761436de 100644
--- a/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll
+++ b/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll
@@ -57,20 +57,20 @@ define amdgpu_kernel void @v4i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX942-NEXT: v_and_b32_e32 v2, 0x3ff, v0
-; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v2
-; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v2
+; GFX942-NEXT: v_and_b32_e32 v3, 0x3ff, v0
+; GFX942-NEXT: v_lshlrev_b32_e32 v1, 2, v3
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942-NEXT: global_load_dword v1, v0, s[0:1]
+; GFX942-NEXT: global_load_dword v2, v1, s[0:1]
+; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v3
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX942-NEXT: s_cbranch_execz .LBB1_2
; GFX942-NEXT: ; %bb.1: ; %bb.1
-; GFX942-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX942-NEXT: global_load_dword v2, v1, s[2:3]
; GFX942-NEXT: .LBB1_2: ; %bb.2
; GFX942-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: s_waitcnt vmcnt(0)
-; GFX942-NEXT: global_store_dword v0, v1, s[6:7]
+; GFX942-NEXT: global_store_dword v0, v2, s[6:7]
; GFX942-NEXT: s_endpgm
entry:
%idx = call i32 @llvm.amdgcn.workitem.id.x()
@@ -135,18 +135,18 @@ define amdgpu_kernel void @v8i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX942-NEXT: v_and_b32_e32 v1, 0x3ff, v0
-; GFX942-NEXT: v_lshlrev_b32_e32 v0, 3, v1
-; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v1
+; GFX942-NEXT: v_and_b32_e32 v4, 0x3ff, v0
+; GFX942-NEXT: v_lshlrev_b32_e32 v1, 3, v4
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942-NEXT: global_load_dwordx2 v[2:3], v0, s[0:1]
+; GFX942-NEXT: global_load_dwordx2 v[2:3], v1, s[0:1]
+; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v4
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX942-NEXT: s_cbranch_execz .LBB3_2
; GFX942-NEXT: ; %bb.1: ; %bb.1
-; GFX942-NEXT: global_load_dwordx2 v[2:3], v0, s[2:3]
+; GFX942-NEXT: global_load_dwordx2 v[2:3], v1, s[2:3]
; GFX942-NEXT: .LBB3_2: ; %bb.2
; GFX942-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[6:7]
; GFX942-NEXT: s_endpgm
@@ -172,18 +172,18 @@ define amdgpu_kernel void @v16i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX942-NEXT: v_and_b32_e32 v1, 0x3ff, v0
-; GFX942-NEXT: v_lshlrev_b32_e32 v0, 4, v1
-; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v1
+; GFX942-NEXT: v_and_b32_e32 v6, 0x3ff, v0
+; GFX942-NEXT: v_lshlrev_b32_e32 v1, 4, v6
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942-NEXT: global_load_dwordx4 v[2:5], v0, s[0:1]
+; GFX942-NEXT: global_load_dwordx4 v[2:5], v1, s[0:1]
+; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v6
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX942-NEXT: s_cbranch_execz .LBB4_2
; GFX942-NEXT: ; %bb.1: ; %bb.1
-; GFX942-NEXT: global_load_dwordx4 v[2:5], v0, s[2:3]
+; GFX942-NEXT: global_load_dwordx4 v[2:5], v1, s[2:3]
; GFX942-NEXT: .LBB4_2: ; %bb.2
; GFX942-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_store_dwordx4 v0, v[2:5], s[6:7]
; GFX942-NEXT: s_endpgm
@@ -617,30 +617,30 @@ define amdgpu_kernel void @v8i8_multi_block(ptr addrspace(1) %src1, ptr addrspac
; GFX942-LABEL: v8i8_multi_block:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
-; GFX942-NEXT: v_and_b32_e32 v2, 0x3ff, v0
-; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v2
-; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v2
+; GFX942-NEXT: v_and_b32_e32 v3, 0x3ff, v0
+; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v3
+; GFX942-NEXT: v_mov_b32_e32 v2, 0
+; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v3
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942-NEXT: global_load_dwordx2 v[0:1], v3, s[8:9]
+; GFX942-NEXT: global_load_dwordx2 v[0:1], v4, s[8:9]
; GFX942-NEXT: s_waitcnt vmcnt(0)
-; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX942-NEXT: s_cbranch_execz .LBB11_4
; GFX942-NEXT: ; %bb.1: ; %bb.1
-; GFX942-NEXT: global_load_dwordx2 v[4:5], v3, s[10:11]
-; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 7, v2
+; GFX942-NEXT: global_load_dwordx2 v[6:7], v4, s[10:11]
+; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 7, v3
; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-NEXT: s_cbranch_execz .LBB11_3
; GFX942-NEXT: ; %bb.2: ; %bb.2
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[12:13]
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[12:13]
; GFX942-NEXT: .LBB11_3: ; %Flow
; GFX942-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX942-NEXT: .LBB11_4: ; %bb.3
; GFX942-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: s_waitcnt vmcnt(0)
-; GFX942-NEXT: global_store_dwordx2 v0, v[4:5], s[14:15]
+; GFX942-NEXT: global_store_dwordx2 v2, v[6:7], s[14:15]
; GFX942-NEXT: s_endpgm
entry:
%idx = call i32 @llvm.amdgcn.workitem.id.x()
@@ -858,19 +858,19 @@ define amdgpu_kernel void @v8i8_mfma_i8(ptr addrspace(1) %src1, ptr addrspace(1)
; GFX942-LABEL: v8i8_mfma_i8:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
-; GFX942-NEXT: v_and_b32_e32 v1, 0x3ff, v0
-; GFX942-NEXT: v_lshlrev_b32_e32 v0, 3, v1
-; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v1
+; GFX942-NEXT: v_and_b32_e32 v4, 0x3ff, v0
+; GFX942-NEXT: v_lshlrev_b32_e32 v1, 3, v4
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v4
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942-NEXT: global_load_dwordx2 v[2:3], v0, s[8:9]
+; GFX942-NEXT: global_load_dwordx2 v[2:3], v1, s[8:9]
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX942-NEXT: s_cbranch_execz .LBB14_2
; GFX942-NEXT: ; %bb.1: ; %bb.1
-; GFX942-NEXT: global_load_dwordx2 v[2:3], v0, s[10:11]
+; GFX942-NEXT: global_load_dwordx2 v[2:3], v1, s[10:11]
; GFX942-NEXT: .LBB14_2: ; %bb.2
; GFX942-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX942-NEXT: s_load_dwordx4 s[0:3], s[14:15], 0x0
-; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: v_accvgpr_write_b32 a0, s0
; GFX942-NEXT: v_accvgpr_write_b32 a1, s1
@@ -995,20 +995,20 @@ define amdgpu_kernel void @v8i8_intrinsic(ptr addrspace(1) %src1, ptr addrspace(
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX942-NEXT: v_and_b32_e32 v3, 0x3ff, v0
-; GFX942-NEXT: v_lshlrev_b32_e32 v2, 3, v3
-; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v3
+; GFX942-NEXT: v_and_b32_e32 v4, 0x3ff, v0
+; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v4
+; GFX942-NEXT: v_mov_b32_e32 v2, 0
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
+; GFX942-NEXT: global_load_dwordx2 v[0:1], v3, s[0:1]
+; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v4
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX942-NEXT: s_cbranch_execz .LBB16_2
; GFX942-NEXT: ; %bb.1: ; %bb.1
-; GFX942-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3]
+; GFX942-NEXT: global_load_dwordx2 v[0:1], v3, s[2:3]
; GFX942-NEXT: .LBB16_2: ; %bb.2
; GFX942-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: v_pk_fma_f32 v[0:1], v[0:1], v[0:1], v[0:1]
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7]
; GFX942-NEXT: s_endpgm
entry:
diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll
index c7827ed300ffe..75db3879e7b03 100644
--- a/llvm/test/CodeGen/AMDGPU/wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wave32.ll
@@ -2250,13 +2250,13 @@ define amdgpu_ps void @test_wqm_vote(float %a) {
; GFX1032: ; %bb.0:
; GFX1032-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_mov_b32 s0, exec_lo
+; GFX1032-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-NEXT: s_wqm_b32 s1, vcc_lo
; GFX1032-NEXT: s_andn2_b32 s1, exec_lo, s1
; GFX1032-NEXT: s_andn2_b32 s0, s0, s1
; GFX1032-NEXT: s_cbranch_scc0 .LBB44_2
; GFX1032-NEXT: ; %bb.1:
; GFX1032-NEXT: s_and_b32 exec_lo, exec_lo, s0
-; GFX1032-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-NEXT: exp mrt0 off, off, off, off
; GFX1032-NEXT: s_endpgm
; GFX1032-NEXT: .LBB44_2:
@@ -2268,13 +2268,13 @@ define amdgpu_ps void @test_wqm_vote(float %a) {
; GFX1064: ; %bb.0:
; GFX1064-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0
; GFX1064-NEXT: s_mov_b64 s[0:1], exec
+; GFX1064-NEXT: v_mov_b32_e32 v0, 0
; GFX1064-NEXT: s_wqm_b64 s[2:3], vcc
; GFX1064-NEXT: s_andn2_b64 s[2:3], exec, s[2:3]
; GFX1064-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3]
; GFX1064-NEXT: s_cbranch_scc0 .LBB44_2
; GFX1064-NEXT: ; %bb.1:
; GFX1064-NEXT: s_and_b64 exec, exec, s[0:1]
-; GFX1064-NEXT: v_mov_b32_e32 v0, 0
; GFX1064-NEXT: exp mrt0 off, off, off, off
; GFX1064-NEXT: s_endpgm
; GFX1064-NEXT: .LBB44_2:
>From d452fa98da0d9162a3b7473d8d5b08d09edb078f Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Tue, 7 Oct 2025 15:34:40 +0000
Subject: [PATCH 09/10] Fix tests
---
.../AMDGPU/buffer-fat-pointers-memcpy.ll | 12 +++++-----
...ne-scheduler-rematerialization-scoring.mir | 23 -------------------
...dgpu_generated_funcs.ll.generated.expected | 8 +++----
...pu_generated_funcs.ll.nogenerated.expected | 8 +++----
4 files changed, 12 insertions(+), 39 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll
index 6780ca55423e9..fd301d3c34a0f 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll
@@ -981,11 +981,11 @@ define amdgpu_kernel void @memcpy_known_medium(ptr addrspace(7) %src, ptr addrsp
; GISEL-GFX942-NEXT: s_mov_b32 s2, s7
; GISEL-GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GISEL-GFX942-NEXT: s_or_b64 s[6:7], s[6:7], s[2:3]
-; GISEL-GFX942-NEXT: v_mov_b32_e32 v0, 0x100
-; GISEL-GFX942-NEXT: v_mov_b32_e32 v1, s16
+; GISEL-GFX942-NEXT: v_mov_b32_e32 v0, s16
+; GISEL-GFX942-NEXT: v_mov_b32_e32 v1, 0x100
; GISEL-GFX942-NEXT: .LBB1_1: ; %load-store-loop
; GISEL-GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
-; GISEL-GFX942-NEXT: v_add_u32_e32 v62, s0, v1
+; GISEL-GFX942-NEXT: v_add_u32_e32 v62, s0, v0
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[2:5], v62, s[8:11], 0 offen
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[6:9], v62, s[8:11], 0 offen offset:16
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[10:13], v62, s[8:11], 0 offen offset:32
@@ -1002,9 +1002,9 @@ define amdgpu_kernel void @memcpy_known_medium(ptr addrspace(7) %src, ptr addrsp
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[54:57], v62, s[8:11], 0 offen offset:208
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[58:61], v62, s[8:11], 0 offen offset:224
; GISEL-GFX942-NEXT: buffer_load_dwordx4 a[0:3], v62, s[8:11], 0 offen offset:240
-; GISEL-GFX942-NEXT: v_add_u32_e32 v63, s12, v1
-; GISEL-GFX942-NEXT: v_add_u32_e32 v1, 0x100, v1
-; GISEL-GFX942-NEXT: v_cmp_lt_u32_e32 vcc, v1, v0
+; GISEL-GFX942-NEXT: v_add_u32_e32 v63, s12, v0
+; GISEL-GFX942-NEXT: v_add_u32_e32 v0, 0x100, v0
+; GISEL-GFX942-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0)
; GISEL-GFX942-NEXT: scratch_store_dwordx4 off, a[0:3], off ; 16-byte Folded Spill
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v63, s[4:7], 0 offen
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-rematerialization-scoring.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-rematerialization-scoring.mir
index e3ebc4e9789c4..0bfcb638038fd 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-rematerialization-scoring.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-rematerialization-scoring.mir
@@ -28,29 +28,6 @@
# | 5 |
# +---+
---- |
- define void @small_num_sgprs_as_spill() "amdgpu-num-sgpr"="85" {
- ret void
- }
- define void @small_num_vgprs_as_spill() "amdgpu-num-vgpr"="14" {
- ret void
- }
- define void @dont_remat_waves_per_eu() "amdgpu-flat-work-group-size"="1024,1024" "amdgpu-waves-per-eu"="7,7" {
- ret void
- }
- define void @dont_remat_at_max_occ() "amdgpu-waves-per-eu"="8,8" {
- ret void
- }
- define void @reduce_arch_and_acc_vgrp_spill() "amdgpu-waves-per-eu"="8,8" {
- ret void
- }
- define void @reduce_spill_archvgpr_above_addressable_limit() "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,2" {
- ret void
- }
- define void @reduce_spill_agpr_above_addressable_limit() "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,2" {
- ret void
- }
----
# %32's defining and using region frequencies are identical therefore it is the
# best register to rematerialize.
name: favor_same_frequency
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected
index bf8f3655be118..429bee4195fa9 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected
@@ -83,11 +83,9 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" }
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8
; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:12
; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:16
-; CHECK-NEXT: s_mov_b64 s[4:5], exec
-; CHECK-NEXT: s_mov_b64 s[6:7], 0
-; CHECK-NEXT: s_and_b64 s[6:7], s[4:5], s[6:7]
-; CHECK-NEXT: s_xor_b64 s[4:5], s[6:7], s[4:5]
-; CHECK-NEXT: s_mov_b64 exec, s[6:7]
+; CHECK-NEXT: s_mov_b64 s[4:5], 0
+; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
+; CHECK-NEXT: s_xor_b64 s[4:5], exec, s[6:7]
; CHECK-NEXT: s_cbranch_execz .LBB0_2
; CHECK-NEXT: ; %bb.1:
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected
index f558c88505e85..842fd8836da7e 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected
@@ -24,11 +24,9 @@ define dso_local i32 @check_boundaries() #0 {
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8
; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:12
; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:16
-; CHECK-NEXT: s_mov_b64 s[4:5], exec
-; CHECK-NEXT: s_mov_b64 s[6:7], 0
-; CHECK-NEXT: s_and_b64 s[6:7], s[4:5], s[6:7]
-; CHECK-NEXT: s_xor_b64 s[4:5], s[6:7], s[4:5]
-; CHECK-NEXT: s_mov_b64 exec, s[6:7]
+; CHECK-NEXT: s_mov_b64 s[4:5], 0
+; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
+; CHECK-NEXT: s_xor_b64 s[4:5], exec, s[6:7]
; CHECK-NEXT: s_cbranch_execz .LBB0_2
; CHECK-NEXT: ; %bb.1:
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4
>From 76a4111edb8041da13082faab91a314d48a39725 Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Wed, 8 Oct 2025 12:02:28 +0000
Subject: [PATCH 10/10] Improve code for frequency-based calculations
- Add some explanatory comments for the calculations.
- Normalize frequencies to minimum to avoid overflows.
- Create helper struct to hold all frequency information (helpful for
further improvements).
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 128 +++++++++++---------
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 36 +++---
2 files changed, 95 insertions(+), 69 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index e669daebd2d37..f5080dddb7fa3 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1193,34 +1193,7 @@ bool PreRARematStage::initGCNSchedStage() {
printTargetRegions(/*PrintAll=*/TargetRegions.none());
});
- // Compute region frequencies. 0 encodes an unknown region frequency.
- SmallVector<uint64_t> RegionFreq;
- RegionFreq.reserve(NumRegions);
- assert(DAG.MLI && "MLI not defined in DAG");
- MachineBranchProbabilityInfo MBPI;
- MachineBlockFrequencyInfo MBFI(MF, MBPI, *DAG.MLI);
- uint64_t MinFreq = MBFI.getEntryFreq().getFrequency(), MaxFreq = 0;
- for (const MachineBasicBlock *MBB : RegionBB) {
- uint64_t BlockFreq = MBFI.getBlockFreq(MBB).getFrequency();
- RegionFreq.push_back(BlockFreq);
- if (BlockFreq < MinFreq)
- MinFreq = BlockFreq;
- else if (BlockFreq > MaxFreq)
- MaxFreq = BlockFreq;
- }
- REMAT_DEBUG({
- dbgs() << "Region frequencies:\n";
- for (auto [I, Freq] : enumerate(RegionFreq)) {
- dbgs() << REMAT_PREFIX << " [" << I << "] ";
- if (Freq)
- dbgs() << Freq;
- else
- dbgs() << "unknown ";
- dbgs() << " | " << *DAG.Regions[I].first;
- }
- });
-
- if (!collectRematRegs(MIRegion, RegionFreq)) {
+ if (!collectRematRegs(MIRegion)) {
REMAT_DEBUG(dbgs() << "No rematerializable registers\n");
return false;
}
@@ -1230,20 +1203,21 @@ bool PreRARematStage::initGCNSchedStage() {
Remat.print();
});
+ const ScoredRemat::FreqInfo FreqInfo(MF, DAG);
SmallVector<ScoredRemat> ScoredRemats;
for (const RematReg &Remat : RematRegs)
- ScoredRemats.emplace_back(&Remat, MinFreq, MaxFreq, DAG);
- BitVector RecomputeRP(NumRegions);
+ ScoredRemats.emplace_back(&Remat, FreqInfo, DAG);
// Rematerialize registers in successive rounds until all RP targets are
// satisifed or until we run out of rematerialization candidates.
#ifndef NDEBUG
unsigned RoundNum = 0;
#endif
+ BitVector RecomputeRP(NumRegions);
do {
// (Re-)Score and (re-)sort all remats in increasing score order.
for (ScoredRemat &Remat : ScoredRemats)
- Remat.update(TargetRegions, RPTargets, RegionFreq, !TargetOcc);
+ Remat.update(TargetRegions, RPTargets, FreqInfo, !TargetOcc);
sort(ScoredRemats);
REMAT_DEBUG({
@@ -1885,10 +1859,7 @@ bool PreRARematStage::setObjective() {
}
bool PreRARematStage::collectRematRegs(
- const DenseMap<MachineInstr *, unsigned> &MIRegion,
- ArrayRef<uint64_t> RegionFreq) {
- assert(RegionFreq.size() == DAG.Regions.size());
-
+ const DenseMap<MachineInstr *, unsigned> &MIRegion) {
// We need up-to-date live-out info. to query live-out register masks in
// regions containing rematerializable instructions.
DAG.RegionLiveOuts.buildLiveRegMap();
@@ -1948,7 +1919,7 @@ bool PreRARematStage::collectRematRegs(
// Add the instruction to the rematerializable list.
RematRegSet.insert(Reg);
- RematRegs.emplace_back(&DefMI, UseMI, DAG, MIRegion, RegionFreq);
+ RematRegs.emplace_back(&DefMI, UseMI, DAG, MIRegion);
}
}
@@ -1957,12 +1928,10 @@ bool PreRARematStage::collectRematRegs(
PreRARematStage::RematReg::RematReg(
MachineInstr *DefMI, MachineInstr *UseMI, GCNScheduleDAGMILive &DAG,
- const DenseMap<MachineInstr *, unsigned> &MIRegion,
- ArrayRef<uint64_t> RegionFreq)
+ const DenseMap<MachineInstr *, unsigned> &MIRegion)
: DefMI(DefMI), UseMI(UseMI), LiveIn(DAG.Regions.size()),
LiveOut(DAG.Regions.size()), Live(DAG.Regions.size()),
- DefRegion(MIRegion.at(DefMI)), UseRegion(MIRegion.at(UseMI)),
- DefFrequency(RegionFreq[DefRegion]), UseFrequency(RegionFreq[UseRegion]) {
+ DefRegion(MIRegion.at(DefMI)), UseRegion(MIRegion.at(UseMI)) {
// Mark regions in which the rematerializable register is live.
Register Reg = getReg();
@@ -2004,11 +1973,50 @@ void PreRARematStage::RematReg::insertMI(unsigned RegionIdx,
DAG.LIS->createAndComputeVirtRegInterval(RematMI->getOperand(0).getReg());
}
+PreRARematStage::ScoredRemat::FreqInfo::FreqInfo(
+ MachineFunction &MF, const GCNScheduleDAGMILive &DAG) {
+ assert(DAG.MLI && "MLI not defined in DAG");
+ MachineBranchProbabilityInfo MBPI;
+ MachineBlockFrequencyInfo MBFI(MF, MBPI, *DAG.MLI);
+
+ const unsigned NumRegions = DAG.Regions.size();
+ uint64_t MinFreq = MBFI.getEntryFreq().getFrequency();
+ Regions.reserve(NumRegions);
+ MaxFreq = 0;
+ for (unsigned I = 0; I < NumRegions; ++I) {
+ MachineBasicBlock *MBB = DAG.Regions[I].first->getParent();
+ uint64_t BlockFreq = MBFI.getBlockFreq(MBB).getFrequency();
+ Regions.push_back(BlockFreq);
+ if (BlockFreq && BlockFreq < MinFreq)
+ MinFreq = BlockFreq;
+ else if (BlockFreq > MaxFreq)
+ MaxFreq = BlockFreq;
+ }
+ if (MinFreq) {
+ // Normalize to minimum observed frequency to avoid overflows when adding up
+ // frequencies.
+ for (uint64_t &Freq : Regions)
+ Freq /= MinFreq;
+ MaxFreq /= MinFreq;
+ }
+
+ REMAT_DEBUG({
+ dbgs() << "Region frequencies\n";
+ for (auto [I, Freq] : enumerate(Regions)) {
+ dbgs() << REMAT_PREFIX << " [" << I << "] ";
+ if (Freq)
+ dbgs() << Freq;
+ else
+ dbgs() << "unknown ";
+ dbgs() << " | " << *DAG.Regions[I].first;
+ }
+ });
+}
+
PreRARematStage::ScoredRemat::ScoredRemat(const RematReg *Remat,
- uint64_t MinFreq, uint64_t MaxFreq,
+ const FreqInfo &Freq,
const GCNScheduleDAGMILive &DAG)
- : Remat(Remat), NumRegs(getNumRegs(DAG)),
- FreqDiff(getFreqDiff(MinFreq, MaxFreq)) {}
+ : Remat(Remat), NumRegs(getNumRegs(DAG)), FreqDiff(getFreqDiff(Freq)) {}
unsigned PreRARematStage::ScoredRemat::getNumRegs(
const GCNScheduleDAGMILive &DAG) const {
@@ -2021,23 +2029,35 @@ unsigned PreRARematStage::ScoredRemat::getNumRegs(
return divideCeil(DAG.TRI->getRegSizeInBits(RC), 32);
}
-uint64_t PreRARematStage::ScoredRemat::getFreqDiff(uint64_t MinFreq,
- uint64_t MaxFreq) const {
- uint64_t DefOrMin = Remat->DefFrequency ? Remat->DefFrequency : MinFreq;
- uint64_t UseOrMax = Remat->UseFrequency ? Remat->UseFrequency : MaxFreq;
- uint64_t MaxDiff = MaxFreq - MinFreq;
- // This is equivalent to (2 * MaxDiff) / 2^NumBitsLatency.
- uint64_t RescaleDenom = MaxDiff >> (FreqDiffWidth - 1);
- RescaleDenom = std::max(RescaleDenom, (uint64_t)1);
+uint64_t PreRARematStage::ScoredRemat::getFreqDiff(const FreqInfo &Info) const {
+ // Get frequencies of defining and using regions. A rematerialization from the
+ // least frequent region to the most frequent region will yield the greatest
+ // latency penalty and therefore should get minimum score. Reciprocally, a
+ // rematerialization in the other direction should get maximum score. Default
+ // to values that will yield the worst possible score given known frequencies
+ // in order to penalize rematerializations from or into regions whose
+ // frequency is unknown
+ uint64_t DefOrOne = Info.Regions[Remat->DefRegion];
+ if (!DefOrOne)
+ DefOrOne = 1;
+ uint64_t UseOrMax = Info.Regions[Remat->UseRegion];
+ if (!UseOrMax)
+ UseOrMax = Info.MaxFreq;
+
+ // Maximum difference in frequency between defining and using regions.
+ const uint64_t MaxDiff = Info.MaxFreq - 1;
+ // This is equivalent to max( (2 * MaxDiff) / 2^NumBitsLatency , 1 ).
+ const uint64_t RescaleDenom =
+ std::max(MaxDiff >> (FreqDiffWidth - 1), (uint64_t)1);
// The difference between defining and using frequency is in the range
// [-MaxDiff, MaxDiff], shift it to [0,2 x MaxDiff] to stay in the positive
// range, then rescale to [0, 2^NumBitsLatency - 1]
- return (MaxDiff + (DefOrMin - UseOrMax)) / RescaleDenom;
+ return (MaxDiff + (DefOrOne - UseOrMax)) / RescaleDenom;
}
void PreRARematStage::ScoredRemat::update(const BitVector &TargetRegions,
ArrayRef<GCNRPTarget> RPTargets,
- ArrayRef<uint64_t> RegionFreq,
+ const FreqInfo &FreqInfo,
bool ReduceSpill) {
setNullScore();
if (!Remat->maybeBeneficial(TargetRegions, RPTargets))
@@ -2055,7 +2075,7 @@ void PreRARematStage::ScoredRemat::update(const BitVector &TargetRegions,
NumBenefitingRegions += UnusedLT ? 2 : 1;
if (ReduceSpill) {
- uint64_t Freq = RegionFreq[I];
+ uint64_t Freq = FreqInfo.Regions[I];
if (!UnusedLT) {
// Apply a frequency penalty in regions in which we are not sure that RP
// will decrease.
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 3ddd9f91ca454..d827bbd292697 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -448,7 +448,7 @@ class ClusteredLowOccStage : public GCNSchedStage {
/// rematerialized.
class PreRARematStage : public GCNSchedStage {
private:
- /// Groups information about a rematerializable register.
+ /// A rematerializable register.
struct RematReg {
/// Single MI defining the rematerializable register.
MachineInstr *DefMI;
@@ -460,13 +460,10 @@ class PreRARematStage : public GCNSchedStage {
LaneBitmask Mask;
/// Defining and using regions.
unsigned DefRegion, UseRegion;
- /// Frequency of defining/using regions. 0 when unknown.
- uint64_t DefFrequency, UseFrequency;
RematReg(MachineInstr *DefMI, MachineInstr *UseMI,
GCNScheduleDAGMILive &DAG,
- const DenseMap<MachineInstr *, unsigned> &MIRegion,
- ArrayRef<uint64_t> RegionFreq);
+ const DenseMap<MachineInstr *, unsigned> &MIRegion);
/// Returns the rematerializable register. Do not call after deleting the
/// original defining instruction.
@@ -502,15 +499,26 @@ class PreRARematStage : public GCNSchedStage {
/// The rematerializable register under consideration.
const RematReg *Remat;
+ /// Execution frequency information required by scoring heuristics.
+ struct FreqInfo {
+ /// Per-region execution frequencies, normalized to minimum observed
+ /// frequency. 0 when unknown.
+ SmallVector<uint64_t> Regions;
+ /// Maximum observed frequency, normalized to minimum observed frequency.
+ uint64_t MaxFreq;
+
+ FreqInfo(MachineFunction &MF, const GCNScheduleDAGMILive &DAG);
+ };
+
/// This only initializes state-independent characteristics of \p Remat, not
/// the actual score.
- ScoredRemat(const RematReg *Remat, uint64_t MinFreq, uint64_t MaxFreq,
+ ScoredRemat(const RematReg *Remat, const FreqInfo &Freq,
const GCNScheduleDAGMILive &DAG);
/// Updates the rematerialization's score w.r.t. the current \p RPTargets.
/// \p RegionFreq indicates the frequency of each region
void update(const BitVector &TargetRegions, ArrayRef<GCNRPTarget> RPTargets,
- ArrayRef<uint64_t> RegionFreq, bool ReduceSpill);
+ const FreqInfo &Freq, bool ReduceSpill);
/// Returns whether the current score is null.
bool hasNullScore() const { return !Score; }
@@ -541,8 +549,8 @@ class PreRARematStage : public GCNSchedStage {
const uint64_t FreqDiff;
using ScoreTy = uint64_t;
- /// Overall rematerialization score. Scoring components are mapped to bit
- /// ranges in the overall score.
+ /// Rematerialization score. Scoring components are mapped to bit ranges in
+ /// the score.
///
/// [63:32] : maximum frequency in benefiting target region (spilling only)
/// [31:16] : frequency difference between defining and using region
@@ -572,7 +580,7 @@ class PreRARematStage : public GCNSchedStage {
unsigned getNumRegs(const GCNScheduleDAGMILive &DAG) const;
- uint64_t getFreqDiff(uint64_t MinFreq, uint64_t MaxFreq) const;
+ uint64_t getFreqDiff(const FreqInfo &Freq) const;
};
/// Holds enough information to rollback a rematerialization decision post
@@ -624,11 +632,9 @@ class PreRARematStage : public GCNSchedStage {
bool updateAndVerifyRPTargets(const BitVector &Regions);
/// Collects all rematerializable registers and appends them to \ref
- /// RematRegs. \p MIRegion maps MIs to their region and \p RegionFreq contains
- /// the frequency of each region, 0 indicating an unknown frequency. Returns
- /// whether any rematerializable register was found.
- bool collectRematRegs(const DenseMap<MachineInstr *, unsigned> &MIRegion,
- ArrayRef<uint64_t> RegionFreq);
+ /// RematRegs. \p MIRegion maps MIs to their region. Returns whether any
+ /// rematerializable register was found.
+ bool collectRematRegs(const DenseMap<MachineInstr *, unsigned> &MIRegion);
/// Rematerializes \p Remat. This removes the rematerialized register from
/// live-in/out lists in the DAG and updates RP targets in all affected
More information about the llvm-commits
mailing list