[llvm] Revert "[AMDGPU][Scheduler] Scoring system for rematerializations (#175050)" (PR #175813)
Lucas Ramirez via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 13 11:01:16 PST 2026
https://github.com/lucas-rami created https://github.com/llvm/llvm-project/pull/175813
This reverts 8ab79377740789f6a34fc6f04ee321a39ab73724 and f21e3593371c049380f056a539a1601a843df558 which are causing a HIP failure in a Blender test.
>From 8ab79377740789f6a34fc6f04ee321a39ab73724 Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Tue, 13 Jan 2026 18:53:22 +0000
Subject: [PATCH 1/2] Revert "[AMDGPU][Scheduler] Fix compile failure due to
const/sort interaction (#175755)"
This reverts commit 125d24af767e11c4f4730f9e9f72aee2dbecec49.
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 5 +++--
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 8 ++++----
2 files changed, 7 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index a032dd2092140..2df9f232364c3 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1396,7 +1396,7 @@ bool PreRARematStage::initGCNSchedStage() {
});
SmallVector<ScoredRemat> ScoredRemats;
- for (RematReg &Remat : RematRegs)
+ for (const RematReg &Remat : RematRegs)
ScoredRemats.emplace_back(&Remat, FreqInfo, DAG);
// Rematerialize registers in successive rounds until all RP targets are
@@ -2215,7 +2215,8 @@ PreRARematStage::ScoredRemat::FreqInfo::FreqInfo(
}
}
-PreRARematStage::ScoredRemat::ScoredRemat(RematReg *Remat, const FreqInfo &Freq,
+PreRARematStage::ScoredRemat::ScoredRemat(const RematReg *Remat,
+ const FreqInfo &Freq,
const GCNScheduleDAGMILive &DAG)
: Remat(Remat), NumRegs(getNumRegs(DAG)), FreqDiff(getFreqDiff(Freq)) {}
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 00876601cbc77..a5c4c960b1f31 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -512,7 +512,7 @@ class PreRARematStage : public GCNSchedStage {
/// is not helpful to reduce RP in target regions.
struct ScoredRemat {
/// The rematerializable register under consideration.
- RematReg *Remat;
+ const RematReg *Remat;
/// Execution frequency information required by scoring heuristics.
/// Frequencies are scaled down if they are high to avoid overflow/underflow
@@ -531,7 +531,7 @@ class PreRARematStage : public GCNSchedStage {
/// This only initializes state-independent characteristics of \p Remat, not
/// the actual score.
- ScoredRemat(RematReg *Remat, const FreqInfo &Freq,
+ ScoredRemat(const RematReg *Remat, const FreqInfo &Freq,
const GCNScheduleDAGMILive &DAG);
/// Updates the rematerialization's score w.r.t. the current \p RPTargets.
@@ -570,7 +570,7 @@ class PreRARematStage : public GCNSchedStage {
private:
/// Number of 32-bit registers this rematerialization covers.
- unsigned NumRegs;
+ const unsigned NumRegs;
// The three members below are the scoring components, top to bottom from
// most important to least important when comparing candidates.
@@ -582,7 +582,7 @@ class PreRARematStage : public GCNSchedStage {
/// Frequency difference between defining and using regions. Negative values
/// indicate we are rematerializing to higher frequency regions; positive
/// values indicate the contrary.
- int64_t FreqDiff;
+ const int64_t FreqDiff;
/// Expected number of target regions impacted by the rematerialization,
/// scaled by the size of the register being rematerialized.
unsigned RegionImpact;
>From f21e3593371c049380f056a539a1601a843df558 Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Tue, 13 Jan 2026 18:59:08 +0000
Subject: [PATCH 2/2] Revert "[AMDGPU][Scheduler] Scoring system for
rematerializations (#175050)"
This reverts commit 6aaa7fd7fbd9981d8947f6d13441bcd9d2052fc1.
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 796 +++++++-----------
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 256 ++----
...ne-scheduler-rematerialization-scoring.mir | 523 ------------
...ine-scheduler-sink-trivial-remats-attr.mir | 388 ++++-----
...ne-scheduler-sink-trivial-remats-debug.mir | 10 +-
.../machine-scheduler-sink-trivial-remats.mir | 277 +-----
llvm/test/CodeGen/AMDGPU/mfma-loop.ll | 2 +-
7 files changed, 575 insertions(+), 1677 deletions(-)
delete mode 100644 llvm/test/CodeGen/AMDGPU/machine-scheduler-rematerialization-scoring.mir
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 2df9f232364c3..c8ce3aab3f303 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -28,20 +28,11 @@
#include "GCNRegPressure.h"
#include "SIMachineFunctionInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
-#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/MC/LaneBitmask.h"
-#include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/MC/MCSchedule.h"
-#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include <limits>
-#include <string>
#define DEBUG_TYPE "machine-scheduler"
@@ -979,8 +970,6 @@ void GCNScheduleDAGMILive::schedule() {
GCNRegPressure
GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {
- if (Regions[RegionIdx].first == Regions[RegionIdx].second)
- return llvm::getRegPressure(MRI, LiveIns[RegionIdx]);
GCNDownwardRPTracker RPTracker(*LIS);
RPTracker.advance(Regions[RegionIdx].first, Regions[RegionIdx].second,
&LiveIns[RegionIdx]);
@@ -1283,222 +1272,33 @@ bool ClusteredLowOccStage::initGCNSchedStage() {
#define REMAT_PREFIX "[PreRARemat] "
#define REMAT_DEBUG(X) LLVM_DEBUG(dbgs() << REMAT_PREFIX; X;)
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-Printable PreRARematStage::ScoredRemat::print() const {
- return Printable([&](raw_ostream &OS) {
- OS << '(' << MaxFreq << ", " << FreqDiff << ", " << RegionImpact << ')';
- });
-}
-#endif
-
bool PreRARematStage::initGCNSchedStage() {
// FIXME: This pass will invalidate cached BBLiveInMap and MBBLiveIns for
// regions inbetween the defs and region we sinked the def to. Will need to be
// fixed if there is another pass after this pass.
assert(!S.hasNextStage());
- if (!GCNSchedStage::initGCNSchedStage() || DAG.Regions.size() <= 1)
+ if (!GCNSchedStage::initGCNSchedStage() || DAG.Regions.size() == 1)
return false;
- // Maps all MIs (except lone terminators, which are not part of any region) to
- // their parent region. Non-lone terminators are considered part of the region
- // they delimitate.
- DenseMap<MachineInstr *, unsigned> MIRegion(MF.getInstructionCount());
-
// Before performing any IR modification record the parent region of each MI
// and the parent MBB of each region.
const unsigned NumRegions = DAG.Regions.size();
+ RegionBB.reserve(NumRegions);
for (unsigned I = 0; I < NumRegions; ++I) {
RegionBoundaries Region = DAG.Regions[I];
for (auto MI = Region.first; MI != Region.second; ++MI)
MIRegion.insert({&*MI, I});
- MachineBasicBlock *ParentMBB = Region.first->getParent();
- if (Region.second != ParentMBB->end())
- MIRegion.insert({&*Region.second, I});
- RegionBB.push_back(ParentMBB);
- }
-
-#ifndef NDEBUG
- auto PrintTargetRegions = [&]() -> void {
- if (TargetRegions.none()) {
- dbgs() << REMAT_PREFIX << "No target regions\n";
- return;
- }
- dbgs() << REMAT_PREFIX << "Target regions:\n";
- for (unsigned I : TargetRegions.set_bits())
- dbgs() << REMAT_PREFIX << " [" << I << "] " << RPTargets[I] << '\n';
- };
- auto PrintRematReg = [&](const RematReg &Remat) -> Printable {
- return Printable([&, Remat](raw_ostream &OS) {
- // Concatenate all region numbers in which the register is unused and
- // live-through.
- bool HasLiveThroughRegion = false;
- OS << '[' << Remat.DefRegion << " -";
- for (unsigned I = 0; I < NumRegions; ++I) {
- if (Remat.isUnusedLiveThrough(I)) {
- if (HasLiveThroughRegion) {
- OS << ',';
- } else {
- OS << "- ";
- HasLiveThroughRegion = true;
- }
- OS << I;
- }
- }
- if (HasLiveThroughRegion)
- OS << " -";
- OS << "-> " << Remat.UseRegion << "] ";
- Remat.DefMI->print(OS, /*IsStandalone=*/true, /*SkipOpers=*/false,
- /*SkipDebugLoc=*/false, /*AddNewLine=*/false);
- });
- };
-#endif
-
- // Set an objective for the stage based on current RP in each region.
- REMAT_DEBUG({
- dbgs() << "Analyzing ";
- MF.getFunction().printAsOperand(dbgs(), false);
- dbgs() << ": ";
- });
- if (!setObjective()) {
- LLVM_DEBUG(dbgs() << "no objective to achieve, occupancy is maximal at "
- << MFI.getMaxWavesPerEU() << '\n');
- return false;
+ RegionBB.push_back(Region.first->getParent());
}
- LLVM_DEBUG({
- if (TargetOcc) {
- dbgs() << "increase occupancy from " << *TargetOcc - 1 << '\n';
- } else {
- dbgs() << "reduce spilling (minimum target occupancy is "
- << MFI.getMinWavesPerEU() << ")\n";
- }
- PrintTargetRegions();
- });
-
- if (!collectRematRegs(MIRegion)) {
- REMAT_DEBUG(dbgs() << "No rematerializable registers\n");
- return false;
- }
- const ScoredRemat::FreqInfo FreqInfo(MF, DAG);
- REMAT_DEBUG({
- dbgs() << "Rematerializable registers:\n";
- for (const RematReg &Remat : RematRegs)
- dbgs() << REMAT_PREFIX << " " << PrintRematReg(Remat) << '\n';
- dbgs() << REMAT_PREFIX << "Region frequencies\n";
- for (auto [I, Freq] : enumerate(FreqInfo.Regions)) {
- dbgs() << REMAT_PREFIX << " [" << I << "] ";
- if (Freq)
- dbgs() << Freq;
- else
- dbgs() << "unknown ";
- dbgs() << " | " << *DAG.Regions[I].first;
- }
- });
- SmallVector<ScoredRemat> ScoredRemats;
- for (const RematReg &Remat : RematRegs)
- ScoredRemats.emplace_back(&Remat, FreqInfo, DAG);
-
-// Rematerialize registers in successive rounds until all RP targets are
-// satisifed or until we run out of rematerialization candidates.
-#ifndef NDEBUG
- unsigned RoundNum = 0;
-#endif
- BitVector RecomputeRP(NumRegions);
- do {
- assert(!ScoredRemats.empty() && "no more remat candidates");
-
- // (Re-)Score and (re-)sort all remats in increasing score order.
- for (ScoredRemat &Remat : ScoredRemats)
- Remat.update(TargetRegions, RPTargets, FreqInfo, !TargetOcc);
- sort(ScoredRemats);
-
- REMAT_DEBUG({
- dbgs() << "==== ROUND " << RoundNum++ << " ====\n"
- << REMAT_PREFIX
- << "Candidates with non-null score, in rematerialization order:\n";
- for (const ScoredRemat &RematDecision : reverse(ScoredRemats)) {
- if (RematDecision.hasNullScore())
- break;
- dbgs() << REMAT_PREFIX << " " << RematDecision.print() << " | "
- << *RematDecision.Remat->DefMI;
- }
- PrintTargetRegions();
- });
-
- RecomputeRP.reset();
- unsigned RematIdx = ScoredRemats.size();
-
- // Rematerialize registers in decreasing score order until we estimate
- // that all RP targets are satisfied or until rematerialization candidates
- // are no longer useful to decrease RP.
- for (; RematIdx && TargetRegions.any(); --RematIdx) {
- const ScoredRemat &Candidate = ScoredRemats[RematIdx - 1];
- // Stop rematerializing on encountering a null score. Since scores
- // monotonically decrease as we rematerialize, we know there is nothing
- // useful left to do in such cases, even if we were to re-score.
- if (Candidate.hasNullScore()) {
- RematIdx = 0;
- break;
- }
-
- const RematReg &Remat = *Candidate.Remat;
- // When previous rematerializations in this round have already satisfied
- // RP targets in all regions this rematerialization can impact, we have a
- // good indication that our scores have diverged significantly from
- // reality, in which case we interrupt this round and re-score. This also
- // ensures that every rematerialization we perform is possibly impactful
- // in at least one target region.
- if (!Remat.maybeBeneficial(TargetRegions, RPTargets))
- break;
-
- REMAT_DEBUG(dbgs() << "** REMAT " << PrintRematReg(Remat) << '\n';);
- // Every rematerialization we do here is likely to move the instruction
- // into a higher frequency region, increasing the total sum latency of the
- // instruction itself. This is acceptable if we are eliminating a spill in
- // the process, but when the goal is increasing occupancy we get nothing
- // out of rematerialization if occupancy is not increased in the end; in
- // such cases we want to roll back the rematerialization.
- RollbackInfo *Rollback =
- TargetOcc ? &Rollbacks.emplace_back(&Remat) : nullptr;
- rematerialize(Remat, RecomputeRP, Rollback);
- unsetSatisifedRPTargets(Remat.Live);
- }
-
- REMAT_DEBUG({
- if (!TargetRegions.any()) {
- dbgs() << "** Interrupt round on all targets achieved\n";
- } else if (RematIdx) {
- dbgs() << "** Interrupt round on stale score for "
- << *ScoredRemats[RematIdx - 1].Remat->DefMI;
- } else {
- dbgs() << "** Stop on exhausted rematerialization candidates\n";
- }
- });
-
- // Peel off registers we already rematerialized from the vector's tail.
- ScoredRemats.truncate(RematIdx);
- } while ((updateAndVerifyRPTargets(RecomputeRP) || TargetRegions.any()) &&
- !ScoredRemats.empty());
- if (RescheduleRegions.none())
+ if (!canIncreaseOccupancyOrReduceSpill())
return false;
- // Commit all pressure changes to the DAG and compute minimum achieved
- // occupancy in impacted regions.
- REMAT_DEBUG(dbgs() << "==== REMAT RESULTS ====\n");
- unsigned DynamicVGPRBlockSize = MFI.getDynamicVGPRBlockSize();
- for (unsigned I : RescheduleRegions.set_bits()) {
- DAG.Pressure[I] = RPTargets[I].getCurrentRP();
- REMAT_DEBUG(dbgs() << '[' << I << "] Achieved occupancy "
- << DAG.Pressure[I].getOccupancy(ST, DynamicVGPRBlockSize)
- << " (" << RPTargets[I] << ")\n");
- }
- AchievedOcc = MFI.getMaxWavesPerEU();
- for (const GCNRegPressure &RP : DAG.Pressure) {
- AchievedOcc =
- std::min(AchievedOcc, RP.getOccupancy(ST, DynamicVGPRBlockSize));
- }
-
+ // Rematerialize identified instructions and update scheduler's state.
+ rematerialize();
+ if (GCNTrackers)
+ DAG.RegionLiveOuts.buildLiveRegMap();
REMAT_DEBUG({
dbgs() << "Retrying function scheduling with new min. occupancy of "
<< AchievedOcc << " from rematerializing (original was "
@@ -1507,6 +1307,7 @@ bool PreRARematStage::initGCNSchedStage() {
dbgs() << ", target was " << *TargetOcc;
dbgs() << ")\n";
});
+
if (AchievedOcc > DAG.MinOccupancy) {
DAG.MinOccupancy = AchievedOcc;
SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
@@ -1540,10 +1341,6 @@ void UnclusteredHighRPStage::finalizeGCNSchedStage() {
}
bool GCNSchedStage::initGCNRegion() {
- // Skip empty scheduling region.
- if (DAG.begin() == DAG.end())
- return false;
-
// Check whether this new region is also a new block.
if (DAG.RegionBegin->getParent() != CurrentMBB)
setupNewBlock();
@@ -1551,8 +1348,8 @@ bool GCNSchedStage::initGCNRegion() {
unsigned NumRegionInstrs = std::distance(DAG.begin(), DAG.end());
DAG.enterRegion(CurrentMBB, DAG.begin(), DAG.end(), NumRegionInstrs);
- // Skip regions with 1 schedulable instruction.
- if (DAG.begin() == std::prev(DAG.end()))
+ // Skip empty scheduling regions (0 or 1 schedulable instructions).
+ if (DAG.begin() == DAG.end() || DAG.begin() == std::prev(DAG.end()))
return false;
LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
@@ -2040,20 +1837,27 @@ void GCNSchedStage::revertScheduling() {
DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
}
-bool PreRARematStage::setObjective() {
+bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
const Function &F = MF.getFunction();
- // Set up "spilling targets" for all regions.
+ // Maps optimizable regions (i.e., regions at minimum and register-limited
+ // occupancy, or regions with spilling) to the target RP we would like to
+ // reach.
+ DenseMap<unsigned, GCNRPTarget> OptRegions;
unsigned MaxSGPRs = ST.getMaxNumSGPRs(F);
unsigned MaxVGPRs = ST.getMaxNumVGPRs(F);
- for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
- const GCNRegPressure &RP = DAG.Pressure[I];
- GCNRPTarget &Target = RPTargets.emplace_back(MaxSGPRs, MaxVGPRs, MF, RP);
- if (!Target.satisfied())
- TargetRegions.set(I);
- }
+ auto ResetTargetRegions = [&]() {
+ OptRegions.clear();
+ for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
+ const GCNRegPressure &RP = DAG.Pressure[I];
+ GCNRPTarget Target(MaxSGPRs, MaxVGPRs, MF, RP);
+ if (!Target.satisfied())
+ OptRegions.insert({I, Target});
+ }
+ };
- if (TargetRegions.any() || DAG.MinOccupancy >= MFI.getMaxWavesPerEU()) {
+ ResetTargetRegions();
+ if (!OptRegions.empty() || DAG.MinOccupancy >= MFI.getMaxWavesPerEU()) {
// In addition to register usage being above addressable limits, occupancy
// below the minimum is considered like "spilling" as well.
TargetOcc = std::nullopt;
@@ -2061,68 +1865,94 @@ bool PreRARematStage::setObjective() {
// There is no spilling and room to improve occupancy; set up "increased
// occupancy targets" for all regions.
TargetOcc = DAG.MinOccupancy + 1;
- const unsigned VGPRBlockSize = MFI.getDynamicVGPRBlockSize();
+ unsigned VGPRBlockSize =
+ MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
MaxSGPRs = ST.getMaxNumSGPRs(*TargetOcc, false);
MaxVGPRs = ST.getMaxNumVGPRs(*TargetOcc, VGPRBlockSize);
- for (auto [I, Target] : enumerate(RPTargets)) {
- Target.setTarget(MaxSGPRs, MaxVGPRs);
- if (!Target.satisfied())
- TargetRegions.set(I);
- }
+ ResetTargetRegions();
}
+ REMAT_DEBUG({
+ dbgs() << "Analyzing ";
+ MF.getFunction().printAsOperand(dbgs(), false);
+ dbgs() << ": ";
+ if (OptRegions.empty()) {
+ dbgs() << "no objective to achieve, occupancy is maximal at "
+ << MFI.getMaxWavesPerEU();
+ } else if (!TargetOcc) {
+ dbgs() << "reduce spilling (minimum target occupancy is "
+ << MFI.getMinWavesPerEU() << ')';
+ } else {
+ dbgs() << "increase occupancy from " << DAG.MinOccupancy << " to "
+ << TargetOcc;
+ }
+ dbgs() << '\n';
+ for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
+ if (auto OptIt = OptRegions.find(I); OptIt != OptRegions.end()) {
+ dbgs() << REMAT_PREFIX << " [" << I << "] " << OptIt->getSecond()
+ << '\n';
+ }
+ }
+ });
+ if (OptRegions.empty())
+ return false;
- return TargetRegions.any();
-}
+ // Accounts for a reduction in RP in an optimizable region. Returns whether we
+ // estimate that we have identified enough rematerialization opportunities to
+ // achieve our goal, and sets Progress to true when this particular reduction
+ // in pressure was helpful toward that goal.
+ auto ReduceRPInRegion = [&](auto OptIt, Register Reg, LaneBitmask Mask,
+ bool &Progress) -> bool {
+ GCNRPTarget &Target = OptIt->getSecond();
+ if (!Target.isSaveBeneficial(Reg))
+ return false;
+ Progress = true;
+ Target.saveReg(Reg, Mask, DAG.MRI);
+ if (Target.satisfied())
+ OptRegions.erase(OptIt->getFirst());
+ return OptRegions.empty();
+ };
-bool PreRARematStage::collectRematRegs(
- const DenseMap<MachineInstr *, unsigned> &MIRegion) {
// We need up-to-date live-out info. to query live-out register masks in
// regions containing rematerializable instructions.
DAG.RegionLiveOuts.buildLiveRegMap();
- // Set of registers already marked for potential remterialization; used to
- // avoid rematerialization chains.
- SmallSet<Register, 4> MarkedRegs;
- auto IsMarkedForRemat = [&MarkedRegs](const MachineOperand &MO) -> bool {
- return MO.isReg() && MarkedRegs.contains(MO.getReg());
- };
+ // Cache set of registers that are going to be rematerialized.
+ DenseSet<unsigned> RematRegs;
// Identify rematerializable instructions in the function.
for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
- RegionBoundaries Bounds = DAG.Regions[I];
- for (auto MI = Bounds.first; MI != Bounds.second; ++MI) {
+ auto Region = DAG.Regions[I];
+ for (auto MI = Region.first; MI != Region.second; ++MI) {
// The instruction must be rematerializable.
MachineInstr &DefMI = *MI;
if (!isReMaterializable(DefMI))
continue;
- // We only support rematerializing virtual registers with one
- // definition.
+ // We only support rematerializing virtual registers with one definition.
Register Reg = DefMI.getOperand(0).getReg();
if (!Reg.isVirtual() || !DAG.MRI.hasOneDef(Reg))
continue;
// We only care to rematerialize the instruction if it has a single
- // non-debug user in a different region.
- // FIXME: Allow rematerializations with multiple uses. This should be
- // relatively easy to support using the current cost model.
+ // non-debug user in a different region. The using MI may not belong to a
+ // region if it is a lone region terminator.
MachineInstr *UseMI = DAG.MRI.getOneNonDBGUser(Reg);
if (!UseMI)
continue;
auto UseRegion = MIRegion.find(UseMI);
- if (UseRegion == MIRegion.end() || UseRegion->second == I)
+ if (UseRegion != MIRegion.end() && UseRegion->second == I)
continue;
// Do not rematerialize an instruction if it uses or is used by an
// instruction that we have designated for rematerialization.
// FIXME: Allow for rematerialization chains: this requires 1. updating
- // remat points to account for uses that are rematerialized, and 2.
- // either rematerializing the candidates in careful ordering, or
- // deferring the MBB RP walk until the entire chain has been
- // rematerialized.
- const MachineOperand &UseMO = UseMI->getOperand(0);
- if (IsMarkedForRemat(UseMO) ||
- llvm::any_of(DefMI.operands(), IsMarkedForRemat))
+ // remat points to account for uses that are rematerialized, and 2. either
+ // rematerializing the candidates in careful ordering, or deferring the
+ // MBB RP walk until the entire chain has been rematerialized.
+ if (Rematerializations.contains(UseMI) ||
+ llvm::any_of(DefMI.operands(), [&RematRegs](MachineOperand &MO) {
+ return MO.isReg() && RematRegs.contains(MO.getReg());
+ }))
continue;
// Do not rematerialize an instruction it it uses registers that aren't
@@ -2133,182 +1963,106 @@ bool PreRARematStage::collectRematRegs(
*DAG.TII))
continue;
- // Add the instruction to the rematerializable list.
- MarkedRegs.insert(Reg);
- RematRegs.emplace_back(&DefMI, UseMI, DAG, MIRegion);
- }
- }
-
- return !RematRegs.empty();
-}
+ REMAT_DEBUG(dbgs() << "Region " << I << ": remat instruction " << DefMI);
+ RematInstruction &Remat =
+ Rematerializations.try_emplace(&DefMI, UseMI).first->second;
+
+ bool RematUseful = false;
+ if (auto It = OptRegions.find(I); It != OptRegions.end()) {
+ // Optimistically consider that moving the instruction out of its
+ // defining region will reduce RP in the latter; this assumes that
+ // maximum RP in the region is reached somewhere between the defining
+ // instruction and the end of the region.
+ REMAT_DEBUG(dbgs() << " Defining region is optimizable\n");
+ LaneBitmask Mask = DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I)[Reg];
+ if (ReduceRPInRegion(It, Reg, Mask, RematUseful))
+ return true;
+ }
-PreRARematStage::RematReg::RematReg(
- MachineInstr *DefMI, MachineInstr *UseMI, GCNScheduleDAGMILive &DAG,
- const DenseMap<MachineInstr *, unsigned> &MIRegion)
- : DefMI(DefMI), UseMI(UseMI), LiveIn(DAG.Regions.size()),
- LiveOut(DAG.Regions.size()), Live(DAG.Regions.size()),
- DefRegion(MIRegion.at(DefMI)), UseRegion(MIRegion.at(UseMI)) {
+ for (unsigned LIRegion = 0; LIRegion != E; ++LIRegion) {
+ // We are only collecting regions in which the register is a live-in
+ // (and may be live-through).
+ auto It = DAG.LiveIns[LIRegion].find(Reg);
+ if (It == DAG.LiveIns[LIRegion].end() || It->second.none())
+ continue;
+ Remat.LiveInRegions.insert(LIRegion);
+
+ // Account for the reduction in RP due to the rematerialization in an
+ // optimizable region in which the defined register is a live-in. This
+ // is exact for live-through region but optimistic in the using region,
+ // where RP is actually reduced only if maximum RP is reached somewhere
+ // between the beginning of the region and the rematerializable
+ // instruction's use.
+ if (auto It = OptRegions.find(LIRegion); It != OptRegions.end()) {
+ REMAT_DEBUG(dbgs() << " Live-in in region " << LIRegion << '\n');
+ if (ReduceRPInRegion(It, Reg, DAG.LiveIns[LIRegion][Reg],
+ RematUseful))
+ return true;
+ }
+ }
- // Mark regions in which the rematerializable register is live.
- Register Reg = getReg();
- for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
- auto LiveInIt = DAG.LiveIns[I].find(Reg);
- if (LiveInIt != DAG.LiveIns[I].end())
- LiveIn.set(I);
- const auto &LiveOuts = DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I);
- if (auto LiveOutIt = LiveOuts.find(Reg); LiveOutIt != LiveOuts.end())
- LiveOut.set(I);
- }
- Live |= LiveIn;
- Live |= LiveOut;
- Mask = DAG.RegionLiveOuts.getLiveRegsForRegionIdx(DefRegion).at(Reg);
-}
-
-bool PreRARematStage::RematReg::maybeBeneficial(
- const BitVector &TargetRegions, ArrayRef<GCNRPTarget> RPTargets) const {
- Register Reg = getReg();
- for (unsigned I : TargetRegions.set_bits()) {
- if (Live[I] && RPTargets[I].isSaveBeneficial(Reg))
- return true;
+ // If the instruction is not a live-in or live-out in any optimizable
+ // region then there is no point in rematerializing it.
+ if (!RematUseful) {
+ Rematerializations.pop_back();
+ REMAT_DEBUG(dbgs() << " No impact, not rematerializing instruction\n");
+ } else {
+ RematRegs.insert(Reg);
+ }
+ }
}
- return false;
-}
-void PreRARematStage::RematReg::insertMI(unsigned RegionIdx,
- MachineInstr *RematMI,
- GCNScheduleDAGMILive &DAG) const {
- RegionBoundaries &Bounds = DAG.Regions[RegionIdx];
- if (Bounds.first == std::next(MachineBasicBlock::iterator(RematMI)))
- Bounds.first = RematMI;
- DAG.LIS->InsertMachineInstrInMaps(*RematMI);
- DAG.LIS->createAndComputeVirtRegInterval(RematMI->getOperand(0).getReg());
+ if (TargetOcc) {
+ // We were trying to increase occupancy but failed, abort the stage.
+ REMAT_DEBUG(dbgs() << "Cannot increase occupancy\n");
+ Rematerializations.clear();
+ return false;
+ }
+ REMAT_DEBUG(dbgs() << "Can reduce but not eliminate spilling\n");
+ return !Rematerializations.empty();
}
-PreRARematStage::ScoredRemat::FreqInfo::FreqInfo(
- MachineFunction &MF, const GCNScheduleDAGMILive &DAG) {
- assert(DAG.MLI && "MLI not defined in DAG");
- MachineBranchProbabilityInfo MBPI;
- MachineBlockFrequencyInfo MBFI(MF, MBPI, *DAG.MLI);
-
- const unsigned NumRegions = DAG.Regions.size();
- MinFreq = MBFI.getEntryFreq().getFrequency();
- MaxFreq = 0;
- Regions.reserve(NumRegions);
- for (unsigned I = 0; I < NumRegions; ++I) {
- MachineBasicBlock *MBB = DAG.Regions[I].first->getParent();
- uint64_t BlockFreq = MBFI.getBlockFreq(MBB).getFrequency();
- Regions.push_back(BlockFreq);
- if (BlockFreq && BlockFreq < MinFreq)
- MinFreq = BlockFreq;
- else if (BlockFreq > MaxFreq)
- MaxFreq = BlockFreq;
- }
- if (!MinFreq)
- return;
-
- // Scale everything down if frequencies are high.
- if (MinFreq >= ScaleFactor * ScaleFactor) {
- for (uint64_t &Freq : Regions)
- Freq /= ScaleFactor;
- MinFreq /= ScaleFactor;
- MaxFreq /= ScaleFactor;
- }
-}
-
-PreRARematStage::ScoredRemat::ScoredRemat(const RematReg *Remat,
- const FreqInfo &Freq,
- const GCNScheduleDAGMILive &DAG)
- : Remat(Remat), NumRegs(getNumRegs(DAG)), FreqDiff(getFreqDiff(Freq)) {}
-
-unsigned PreRARematStage::ScoredRemat::getNumRegs(
- const GCNScheduleDAGMILive &DAG) const {
- const TargetRegisterClass &RC = *DAG.MRI.getRegClass(Remat->getReg());
- unsigned RegSize = DAG.TRI->getRegSizeInBits(RC);
- if (unsigned SubIdx = Remat->DefMI->getOperand(0).getSubReg()) {
- // The following may return -1 (i.e., a large unsigned number) on indices
- // that may be used to access subregisters of multiple sizes; in such cases
- // fallback on the size derived from the register class.
- unsigned SubRegSize = DAG.TRI->getSubRegIdxSize(SubIdx);
- if (SubRegSize < RegSize)
- RegSize = SubRegSize;
- }
- return divideCeil(RegSize, 32);
-}
-
-int64_t PreRARematStage::ScoredRemat::getFreqDiff(const FreqInfo &Freq) const {
- // Get frequencies of defining and using regions. A rematerialization from the
- // least frequent region to the most frequent region will yield the greatest
- // latency penalty and therefore should get minimum score. Reciprocally, a
- // rematerialization in the other direction should get maximum score. Default
- // to values that will yield the worst possible score given known frequencies
- // in order to penalize rematerializations from or into regions whose
- // frequency is unknown.
- int64_t DefOrMin = std::max(Freq.Regions[Remat->DefRegion], Freq.MinFreq);
- int64_t UseOrMax = Freq.Regions[Remat->UseRegion];
- if (!UseOrMax)
- UseOrMax = Freq.MaxFreq;
- return DefOrMin - UseOrMax;
-}
-
-void PreRARematStage::ScoredRemat::update(const BitVector &TargetRegions,
- ArrayRef<GCNRPTarget> RPTargets,
- const FreqInfo &FreqInfo,
- bool ReduceSpill) {
- MaxFreq = 0;
- RegionImpact = 0;
- for (unsigned I : TargetRegions.set_bits()) {
- if (!Remat->Live[I] || !RPTargets[I].isSaveBeneficial(Remat->getReg()))
- continue;
- bool UnusedLT = Remat->isUnusedLiveThrough(I);
-
- // Regions in which RP is guaranteed to decrease have more weight.
- RegionImpact += UnusedLT ? 2 : 1;
+void PreRARematStage::rematerialize() {
+ const SIInstrInfo *TII = MF.getSubtarget<GCNSubtarget>().getInstrInfo();
- if (ReduceSpill) {
- uint64_t Freq = FreqInfo.Regions[I];
- if (!UnusedLT) {
- // Apply a frequency penalty in regions in which we are not sure that RP
- // will decrease.
- Freq /= 2;
- }
- MaxFreq = std::max(MaxFreq, Freq);
+ // Collect regions whose RP changes in unpredictable way; we will have to
+ // fully recompute their RP after all rematerailizations.
+ DenseSet<unsigned> RecomputeRP;
+
+ // Rematerialize all instructions.
+ for (auto &[DefMI, Remat] : Rematerializations) {
+ MachineBasicBlock::iterator InsertPos(Remat.UseMI);
+ Register Reg = DefMI->getOperand(0).getReg();
+ unsigned DefRegion = MIRegion.at(DefMI);
+
+ // Rematerialize DefMI to its use block.
+ TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg,
+ AMDGPU::NoSubRegister, *DefMI);
+ Remat.RematMI = &*std::prev(InsertPos);
+ DAG.LIS->InsertMachineInstrInMaps(*Remat.RematMI);
+
+ // Update region boundaries in regions we sinked from (remove defining MI)
+ // and to (insert MI rematerialized in use block). Only then we can erase
+ // the original MI.
+ DAG.updateRegionBoundaries(DAG.Regions[DefRegion], DefMI, nullptr);
+ auto UseRegion = MIRegion.find(Remat.UseMI);
+ if (UseRegion != MIRegion.end()) {
+ DAG.updateRegionBoundaries(DAG.Regions[UseRegion->second], InsertPos,
+ Remat.RematMI);
}
- }
- RegionImpact *= NumRegs;
-}
+ DAG.LIS->RemoveMachineInstrFromMaps(*DefMI);
+ DefMI->eraseFromParent();
-void PreRARematStage::rematerialize(const RematReg &Remat,
- BitVector &RecomputeRP,
- RollbackInfo *Rollback) {
- const SIInstrInfo *TII = MF.getSubtarget<GCNSubtarget>().getInstrInfo();
- MachineInstr &DefMI = *Remat.DefMI;
- Register Reg = DefMI.getOperand(0).getReg();
- Register NewReg = DAG.MRI.cloneVirtualRegister(Reg);
-
- // Rematerialize the register in the region where it is used.
- MachineBasicBlock::iterator InsertPos = Remat.UseMI;
- TII->reMaterialize(*InsertPos->getParent(), InsertPos, NewReg, 0, DefMI);
- MachineInstr *RematMI = &*std::prev(InsertPos);
- Remat.UseMI->substituteRegister(Reg, NewReg, 0, *DAG.TRI);
- Remat.insertMI(Remat.UseRegion, RematMI, DAG);
- if (Rollback) {
- Rollback->RematMI = RematMI;
- // Make the original MI a debug instruction so that it does not influence
- // scheduling.
- DefMI.setDesc(TII->get(TargetOpcode::DBG_VALUE));
- } else {
- // Just delete the original instruction if it cannot be rolled back.
- DAG.deleteMI(Remat.DefRegion, &DefMI);
- }
+ // Collect all regions impacted by the rematerialization and update their
+ // live-in/RP information.
+ for (unsigned I : Remat.LiveInRegions) {
+ ImpactedRegions.insert({I, DAG.Pressure[I]});
+ GCNRPTracker::LiveRegSet &RegionLiveIns = DAG.LiveIns[I];
- // Remove the register from all regions where it is a live-in or live-out
- // and adjust RP targets.
- for (unsigned I : Remat.Live.set_bits()) {
#ifdef EXPENSIVE_CHECKS
- if (!Remat.LiveIn[I] && Remat.LiveOut[I]) {
- // All uses are known to be available / live at the remat point. Thus,
- // the uses should already be live in to the region.
- for (MachineOperand &MO : DefMI.operands()) {
+ // All uses are known to be available / live at the remat point. Thus, the
+ // uses should already be live in to the region.
+ for (MachineOperand &MO : DefMI->operands()) {
if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
continue;
@@ -2321,7 +2075,7 @@ void PreRARematStage::rematerialize(const RematReg &Remat,
if (LI.hasSubRanges() && MO.getSubReg())
LM = DAG.TRI->getSubRegIndexLaneMask(MO.getSubReg());
- LaneBitmask LiveInMask = DAG.LiveIns[I].at(UseReg);
+ LaneBitmask LiveInMask = RegionLiveIns.at(UseReg);
LaneBitmask UncoveredLanes = LM & ~(LiveInMask & LM);
// If this register has lanes not covered by the LiveIns, be sure they
// do not map to any subrange. ref:
@@ -2332,74 +2086,65 @@ void PreRARematStage::rematerialize(const RematReg &Remat,
assert((SR.LaneMask & UncoveredLanes).none());
}
}
- }
#endif
- // This save is guaranteed in regions in which the register is live-through
- // and unused but optimistic in all other regions where the register is
- // live.
- RPTargets[I].saveReg(Reg, Remat.Mask, DAG.MRI);
- DAG.LiveIns[I].erase(Reg);
- DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I).erase(Reg);
- if (!Remat.isUnusedLiveThrough(I))
- RecomputeRP.set(I);
- }
-
- RescheduleRegions |= Remat.Live;
-}
-
-void PreRARematStage::rollback(const RollbackInfo &Rollback,
- BitVector &RecomputeRP) const {
- auto &[Remat, RematMI] = Rollback;
-
- // Switch back to using the original register and delete the
- // rematerialization.
- Remat->DefMI->setDesc(DAG.TII->get(RematMI->getOpcode()));
- Register Reg = RematMI->getOperand(0).getReg();
- Register OriginalReg = Remat->DefMI->getOperand(0).getReg();
- Remat->UseMI->substituteRegister(Reg, OriginalReg, 0, *DAG.TRI);
- REMAT_DEBUG(dbgs() << '[' << Remat->UseRegion
- << "] Deleting rematerialization " << *RematMI);
- DAG.deleteMI(Remat->UseRegion, RematMI);
-
- // Regenerate the original register's interval as slot indices may have
- // changed slightly from before re-scheduling, and re-add it as a
- // live-in/live-out in all regions it used to be one in.
- DAG.LIS->removeInterval(OriginalReg);
- DAG.LIS->createAndComputeVirtRegInterval(OriginalReg);
- std::pair<Register, LaneBitmask> LiveReg(OriginalReg, Remat->Mask);
- for (unsigned I : Remat->LiveIn.set_bits())
- DAG.LiveIns[I].insert(LiveReg);
- for (unsigned I : Remat->LiveOut.set_bits())
- DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I).insert(LiveReg);
- RecomputeRP |= Remat->Live;
-}
-
-void PreRARematStage::unsetSatisifedRPTargets(const BitVector &Regions) {
- for (unsigned I : Regions.set_bits()) {
- if (TargetRegions[I] && RPTargets[I].satisfied()) {
- REMAT_DEBUG(dbgs() << " [" << I << "] Target reached!\n");
- TargetRegions.reset(I);
+ // The register is no longer a live-in in all regions but the one that
+ // contains the single use. In live-through regions, maximum register
+ // pressure decreases predictably so we can directly update it. In the
+ // using region, maximum RP may or may not decrease, so we will mark it
+ // for re-computation after all materializations have taken place.
+ LaneBitmask PrevMask = RegionLiveIns[Reg];
+ RegionLiveIns.erase(Reg);
+ RegMasks.insert({{I, Remat.RematMI->getOperand(0).getReg()}, PrevMask});
+ if (Remat.UseMI->getParent() != DAG.Regions[I].first->getParent())
+ DAG.Pressure[I].inc(Reg, PrevMask, LaneBitmask::getNone(), DAG.MRI);
+ else
+ RecomputeRP.insert(I);
}
- }
-}
-
-bool PreRARematStage::updateAndVerifyRPTargets(const BitVector &Regions) {
- bool TooOptimistic = false;
- for (unsigned I : Regions.set_bits()) {
- GCNRPTarget &Target = RPTargets[I];
- Target.setRP(DAG.getRealRegPressure(I));
+ // RP in the region from which the instruction was rematerialized may or may
+ // not decrease.
+ ImpactedRegions.insert({DefRegion, DAG.Pressure[DefRegion]});
+ RecomputeRP.insert(DefRegion);
+
+ // Recompute live interval to reflect the register's rematerialization.
+ Register RematReg = Remat.RematMI->getOperand(0).getReg();
+ DAG.LIS->removeInterval(RematReg);
+ DAG.LIS->createAndComputeVirtRegInterval(RematReg);
+ }
+
+ // All regions impacted by at least one rematerialization must be rescheduled.
+ // Maximum pressure must also be recomputed for all regions where it changed
+ // non-predictably and checked against the target occupancy.
+ unsigned DynamicVGPRBlockSize =
+ MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
+ AchievedOcc = MFI.getMaxWavesPerEU();
+ for (auto &[I, OriginalRP] : ImpactedRegions) {
+ bool IsEmptyRegion = DAG.Regions[I].first == DAG.Regions[I].second;
+ RescheduleRegions[I] = !IsEmptyRegion;
+ if (!RecomputeRP.contains(I))
+ continue;
- // Since we were optimistic in assessing RP decreases in these regions, we
- // may need to remark the target as a target region if RP didn't decrease
- // as expected.
- if (!TargetRegions[I] && !Target.satisfied()) {
- REMAT_DEBUG(dbgs() << " [" << I << "] Incorrect RP estimation\n");
- TooOptimistic = true;
- TargetRegions.set(I);
+ GCNRegPressure RP;
+ if (IsEmptyRegion) {
+ RP = getRegPressure(DAG.MRI, DAG.LiveIns[I]);
+ } else {
+ GCNDownwardRPTracker RPT(*DAG.LIS);
+ auto *NonDbgMI = &*skipDebugInstructionsForward(DAG.Regions[I].first,
+ DAG.Regions[I].second);
+ if (NonDbgMI == DAG.Regions[I].second) {
+ // Region is non-empty but contains only debug instructions.
+ RP = getRegPressure(DAG.MRI, DAG.LiveIns[I]);
+ } else {
+ RPT.reset(*NonDbgMI, &DAG.LiveIns[I]);
+ RPT.advance(DAG.Regions[I].second);
+ RP = RPT.moveMaxPressure();
+ }
}
+ DAG.Pressure[I] = RP;
+ AchievedOcc =
+ std::min(AchievedOcc, RP.getOccupancy(ST, DynamicVGPRBlockSize));
}
- return TooOptimistic;
+ REMAT_DEBUG(dbgs() << "Achieved occupancy " << AchievedOcc << "\n");
}
// Copied from MachineLICM
@@ -2423,37 +2168,78 @@ bool PreRARematStage::isReMaterializable(const MachineInstr &MI) {
void PreRARematStage::finalizeGCNSchedStage() {
// We consider that reducing spilling is always beneficial so we never
// rollback rematerializations in such cases. It's also possible that
- // rescheduling lowers occupancy over the one achieved just through remats,
- // in which case we do not want to rollback either (the rescheduling was
- // already reverted in PreRARematStage::shouldRevertScheduling in such
- // cases).
+ // rescheduling lowers occupancy over the one achieved just through remats, in
+ // which case we do not want to rollback either (the rescheduling was already
+ // reverted in PreRARematStage::shouldRevertScheduling in such cases).
unsigned MaxOcc = std::max(AchievedOcc, DAG.MinOccupancy);
- if (!TargetOcc || MaxOcc >= *TargetOcc) {
- // Fully delete the original MIs that were rematerialized.
- for (const RollbackInfo &Rollback : Rollbacks)
- DAG.deleteMI(Rollback.Remat->DefRegion, Rollback.Remat->DefMI);
+ if (!TargetOcc || MaxOcc >= *TargetOcc)
return;
+
+ REMAT_DEBUG(dbgs() << "Rolling back all rematerializations\n");
+ const SIInstrInfo *TII = MF.getSubtarget<GCNSubtarget>().getInstrInfo();
+
+ // Rollback the rematerializations.
+ for (const auto &[DefMI, Remat] : Rematerializations) {
+ MachineInstr &RematMI = *Remat.RematMI;
+ unsigned DefRegion = MIRegion.at(DefMI);
+ MachineBasicBlock::iterator InsertPos(DAG.Regions[DefRegion].second);
+ MachineBasicBlock *MBB = RegionBB[DefRegion];
+ Register Reg = RematMI.getOperand(0).getReg();
+
+ // Re-rematerialize MI at the end of its original region. Note that it may
+ // not be rematerialized exactly in the same position as originally within
+ // the region, but it should not matter much.
+ TII->reMaterialize(*MBB, InsertPos, Reg, AMDGPU::NoSubRegister, RematMI);
+ MachineInstr *NewMI = &*std::prev(InsertPos);
+ DAG.LIS->InsertMachineInstrInMaps(*NewMI);
+
+ auto UseRegion = MIRegion.find(Remat.UseMI);
+ if (UseRegion != MIRegion.end()) {
+ DAG.updateRegionBoundaries(DAG.Regions[UseRegion->second], RematMI,
+ nullptr);
+ }
+ DAG.updateRegionBoundaries(DAG.Regions[DefRegion], InsertPos, NewMI);
+
+ // Erase rematerialized MI.
+ DAG.LIS->RemoveMachineInstrFromMaps(RematMI);
+ RematMI.eraseFromParent();
+
+ // Recompute live interval for the re-rematerialized register
+ DAG.LIS->removeInterval(Reg);
+ DAG.LIS->createAndComputeVirtRegInterval(Reg);
+
+ // Re-add the register as a live-in in all regions it used to be one in.
+ for (unsigned LIRegion : Remat.LiveInRegions)
+ DAG.LiveIns[LIRegion].insert({Reg, RegMasks.at({LIRegion, Reg})});
}
- // Rollback, then recompute pressure in all affected regions.
- REMAT_DEBUG(dbgs() << "==== ROLLBACK ====\n");
- BitVector RecomputeRP(DAG.Regions.size());
- for (const RollbackInfo &Rollback : Rollbacks)
- rollback(Rollback, RecomputeRP);
- for (unsigned I : RecomputeRP.set_bits())
- DAG.Pressure[I] = DAG.getRealRegPressure(I);
+ // Reset RP in all impacted regions.
+ for (auto &[I, OriginalRP] : ImpactedRegions)
+ DAG.Pressure[I] = OriginalRP;
GCNSchedStage::finalizeGCNSchedStage();
}
-void GCNScheduleDAGMILive::deleteMI(unsigned RegionIdx, MachineInstr *MI) {
- // It's not possible for the deleted instruction to be upper region boundary
- // since we don't delete region terminators.
- if (Regions[RegionIdx].first == MI)
- Regions[RegionIdx].first = std::next(MachineBasicBlock::iterator(MI));
- LIS->removeInterval(MI->getOperand(0).getReg());
- LIS->RemoveMachineInstrFromMaps(*MI);
- MI->eraseFromParent();
+void GCNScheduleDAGMILive::updateRegionBoundaries(
+ RegionBoundaries &RegionBounds, MachineBasicBlock::iterator MI,
+ MachineInstr *NewMI) {
+ assert((!NewMI || NewMI != RegionBounds.second) &&
+ "cannot remove at region end");
+
+ if (RegionBounds.first == RegionBounds.second) {
+ assert(NewMI && "cannot remove from an empty region");
+ RegionBounds.first = NewMI;
+ return;
+ }
+
+ // We only care for modifications at the beginning of a non-empty region since
+ // the upper region boundary is exclusive.
+ if (MI != RegionBounds.first)
+ return;
+ if (!NewMI)
+ RegionBounds.first = std::next(MI); // Removal
+ else
+ RegionBounds.first = NewMI; // Insertion
}
static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG) {
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index a5c4c960b1f31..95a931b9beb2a 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -18,8 +18,6 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineScheduler.h"
-#include <cstdint>
-#include <limits>
namespace llvm {
@@ -302,12 +300,18 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
// Compute and cache live-ins and pressure for all regions in block.
void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB);
+ /// If necessary, updates a region's boundaries following insertion ( \p NewMI
+ /// != nullptr) or removal ( \p NewMI == nullptr) of a \p MI in the region.
+ /// For an MI removal, this must be called before the MI is actually erased
+ /// from its parent MBB.
+ void updateRegionBoundaries(RegionBoundaries &RegionBounds,
+ MachineBasicBlock::iterator MI,
+ MachineInstr *NewMI);
+
void runSchedStages();
std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID);
- void deleteMI(unsigned RegionIdx, MachineInstr *MI);
-
public:
GCNScheduleDAGMILive(MachineSchedContext *C,
std::unique_ptr<MachineSchedStrategy> S);
@@ -447,222 +451,65 @@ class ClusteredLowOccStage : public GCNSchedStage {
};
/// Attempts to reduce function spilling or, if there is no spilling, to
-/// increase function occupancy by one with respect to register usage by sinking
-/// rematerializable instructions to their use. When the stage estimates that
-/// reducing spilling or increasing occupancy is possible, it tries to
-/// rematerialize as few registers as possible to reduce potential negative
+/// increase function occupancy by one with respect to ArchVGPR usage by sinking
+/// rematerializable instructions to their use. When the stage
+/// estimates reducing spilling or increasing occupancy is possible, as few
+/// instructions as possible are rematerialized to reduce potential negative
/// effects on function latency.
-///
-/// The stage only supports rematerializing registers that meet all of the
-/// following constraints.
-/// 1. The register is virtual and has a single defining instruction.
-/// 2. The single defining instruction is either deemed rematerializable by the
-/// target-independent logic, or if not, has no non-constant and
-/// non-ignorable physical register use.
-/// 3 The register has no virtual register use whose live range would be
-/// extended by the rematerialization.
-/// 4. The register has a single non-debug user in a different region from its
-/// defining region.
-/// 5. The register is not used by or using another register that is going to be
-/// rematerialized.
class PreRARematStage : public GCNSchedStage {
private:
- /// A rematerializable register.
- struct RematReg {
- /// Single MI defining the rematerializable register.
- MachineInstr *DefMI;
- /// Single user of the rematerializable register.
+ /// Useful information about a rematerializable instruction.
+ struct RematInstruction {
+ /// Single use of the rematerializable instruction's defined register,
+ /// located in a different block.
MachineInstr *UseMI;
- /// Regions in which the register is live-in/live-out/live anywhere.
- BitVector LiveIn, LiveOut, Live;
- /// The rematerializable register's lane bitmask.
- LaneBitmask Mask;
- /// Defining and using regions.
- unsigned DefRegion, UseRegion;
-
- RematReg(MachineInstr *DefMI, MachineInstr *UseMI,
- GCNScheduleDAGMILive &DAG,
- const DenseMap<MachineInstr *, unsigned> &MIRegion);
-
- /// Returns the rematerializable register. Do not call after deleting the
- /// original defining instruction.
- Register getReg() const { return DefMI->getOperand(0).getReg(); }
-
- /// Determines whether this rematerialization may be beneficial in at least
- /// one target region.
- bool maybeBeneficial(const BitVector &TargetRegions,
- ArrayRef<GCNRPTarget> RPTargets) const;
-
- /// Determines if the register is both unused and live-through in region \p
- /// I. This guarantees that rematerializing it will reduce RP in the region.
- bool isUnusedLiveThrough(unsigned I) const {
- assert(I < Live.size() && "region index out of range");
- return LiveIn[I] && LiveOut[I] && I != UseRegion;
- }
-
- /// Updates internal structures following a MI rematerialization. Part of
- /// the stage instead of the DAG because it makes assumptions that are
- /// specific to the rematerialization process.
- void insertMI(unsigned RegionIdx, MachineInstr *RematMI,
- GCNScheduleDAGMILive &DAG) const;
- };
-
- /// A scored rematerialization candidate. Higher scores indicate more
- /// beneficial rematerializations. A null score indicate the rematerialization
- /// is not helpful to reduce RP in target regions.
- struct ScoredRemat {
- /// The rematerializable register under consideration.
- const RematReg *Remat;
-
- /// Execution frequency information required by scoring heuristics.
- /// Frequencies are scaled down if they are high to avoid overflow/underflow
- /// when combining them.
- struct FreqInfo {
- /// Per-region execution frequencies. 0 when unknown.
- SmallVector<uint64_t> Regions;
- /// Minimum and maximum observed frequencies.
- uint64_t MinFreq, MaxFreq;
-
- FreqInfo(MachineFunction &MF, const GCNScheduleDAGMILive &DAG);
-
- private:
- static const uint64_t ScaleFactor = 1024;
- };
-
- /// This only initializes state-independent characteristics of \p Remat, not
- /// the actual score.
- ScoredRemat(const RematReg *Remat, const FreqInfo &Freq,
- const GCNScheduleDAGMILive &DAG);
-
- /// Updates the rematerialization's score w.r.t. the current \p RPTargets.
- /// \p RegionFreq indicates the frequency of each region
- void update(const BitVector &TargetRegions, ArrayRef<GCNRPTarget> RPTargets,
- const FreqInfo &Freq, bool ReduceSpill);
-
- /// Returns whether the current score is null, indicating the
- /// rematerialization is useless.
- bool hasNullScore() const { return !RegionImpact; }
-
- /// Compare score components of non-null scores pair-wise. A null score is
- /// always strictly lesser than another non-null score.
- bool operator<(const ScoredRemat &O) const {
- if (hasNullScore())
- return !O.hasNullScore();
- if (O.hasNullScore())
- return false;
- if (MaxFreq != O.MaxFreq)
- return MaxFreq < O.MaxFreq;
- if (FreqDiff != O.FreqDiff)
- return FreqDiff < O.FreqDiff;
- if (RegionImpact != O.RegionImpact)
- return RegionImpact < O.RegionImpact;
- // Break ties using pointer to rematerializable register. Rematerializable
- // registers are collected in instruction order so, within the same
- // region, this will prefer registers defined earlier that have longer
- // live ranges in their defining region (since the registers we consider
- // are always live-out in their defining region).
- return Remat > O.Remat;
- }
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- Printable print() const;
-#endif
-
- private:
- /// Number of 32-bit registers this rematerialization covers.
- const unsigned NumRegs;
-
- // The three members below are the scoring components, top to bottom from
- // most important to least important when comparing candidates.
-
- /// Frequency of impacted target region with highest known frequency. This
- /// only matters when the stage is trying to reduce spilling, so it is
- /// always 0 when it is not.
- uint64_t MaxFreq;
- /// Frequency difference between defining and using regions. Negative values
- /// indicate we are rematerializing to higher frequency regions; positive
- /// values indicate the contrary.
- const int64_t FreqDiff;
- /// Expected number of target regions impacted by the rematerialization,
- /// scaled by the size of the register being rematerialized.
- unsigned RegionImpact;
-
- unsigned getNumRegs(const GCNScheduleDAGMILive &DAG) const;
-
- int64_t getFreqDiff(const FreqInfo &Freq) const;
- };
-
- /// Holds enough information to rollback a rematerialization decision post
- /// re-scheduling.
- struct RollbackInfo {
- /// The rematerializable register under consideration.
- const RematReg *Remat;
- /// The rematerialized MI replacing the original defining MI.
+ /// Rematerialized version of \p DefMI, set in
+ /// PreRARematStage::rematerialize. Used for reverting rematerializations.
MachineInstr *RematMI;
+ /// Set of regions in which the rematerializable instruction's defined
+ /// register is a live-in.
+ SmallDenseSet<unsigned, 4> LiveInRegions;
- RollbackInfo(const RematReg *Remat) : Remat(Remat) {}
+ RematInstruction(MachineInstr *UseMI) : UseMI(UseMI) {}
};
+ /// Maps all MIs to their parent region. MI terminators are considered to be
+ /// outside the region they delimitate, and as such are not stored in the map.
+ DenseMap<MachineInstr *, unsigned> MIRegion;
/// Parent MBB to each region, in region order.
SmallVector<MachineBasicBlock *> RegionBB;
-
- /// Register pressure targets for all regions.
- SmallVector<GCNRPTarget> RPTargets;
- /// Regions which are above the stage's RP target.
- BitVector TargetRegions;
- /// The target occupancy the set is trying to achieve. Empty when the
+ /// Collects instructions to rematerialize.
+ MapVector<MachineInstr *, RematInstruction> Rematerializations;
+ /// Collects regions whose live-ins or register pressure will change due to
+ /// rematerializations.
+ DenseMap<unsigned, GCNRegPressure> ImpactedRegions;
+ /// In case we need to rollback rematerializations, save lane masks for all
+ /// rematerialized registers in all regions in which they are live-ins.
+ DenseMap<std::pair<unsigned, Register>, LaneBitmask> RegMasks;
+ /// After successful stage initialization, indicates which regions should be
+ /// rescheduled.
+ BitVector RescheduleRegions;
+ /// The target occupancy the stage is trying to achieve. Empty when the
/// objective is spilling reduction.
std::optional<unsigned> TargetOcc;
/// Achieved occupancy *only* through rematerializations (pre-rescheduling).
- /// Smaller than or equal to the target occupancy, when it is defined.
+ /// Smaller than or equal to the target occupancy.
unsigned AchievedOcc;
- /// List of rematerializable registers.
- SmallVector<RematReg> RematRegs;
- /// List of rematerializations to rollback if rematerialization does not end
- /// up being beneficial.
- SmallVector<RollbackInfo> Rollbacks;
- /// After successful stage initialization, indicates which regions should be
- /// rescheduled.
- BitVector RescheduleRegions;
-
- /// Determines the stage's objective (increasing occupancy or reducing
- /// spilling, set in \ref TargetOcc). Defines \ref RPTargets in all regions to
- /// achieve that objective and mark those that don't achieve it in \ref
- /// TargetRegions. Returns whether there is any target region.
- bool setObjective();
-
- /// Unsets target regions in \p Regions whose RP target has been reached.
- void unsetSatisifedRPTargets(const BitVector &Regions);
-
- /// Fully recomputes RP from the DAG in \p Regions. Among those regions, sets
- /// again all \ref TargetRegions that were optimistically marked as satisfied
- /// but are actually not, and returns whether there were any such regions.
- bool updateAndVerifyRPTargets(const BitVector &Regions);
-
- /// Collects all rematerializable registers and appends them to \ref
- /// RematRegs. \p MIRegion maps MIs to their region. Returns whether any
- /// rematerializable register was found.
- bool collectRematRegs(const DenseMap<MachineInstr *, unsigned> &MIRegion);
-
- /// Rematerializes \p Remat. This removes the rematerialized register from
- /// live-in/out lists in the DAG and updates RP targets in all affected
- /// regions, which are also marked in \ref RescheduleRegions. Regions in which
- /// RP savings are not guaranteed are set in \p RecomputeRP. When \p Rollback
- /// is non-null, fills it with required information to be able to rollback the
- /// rematerialization post-rescheduling.
- void rematerialize(const RematReg &Remat, BitVector &RecomputeRP,
- RollbackInfo *Rollback);
-
- /// Rollbacks the rematerialization decision represented by \p Rollback. This
- /// update live-in/out lists in the DAG but does not update cached register
- /// pressures. Regions in which RP may be impacted are marked in \ref
- /// RecomputeRP.
- void rollback(const RollbackInfo &Rollback, BitVector &RecomputeRP) const;
+ /// Returns whether remat can reduce spilling or increase function occupancy
+ /// by 1 through rematerialization. If it can do one, collects instructions in
+ /// PreRARematStage::Rematerializations and sets the target occupancy in
+ /// PreRARematStage::TargetOccupancy.
+ bool canIncreaseOccupancyOrReduceSpill();
/// Whether the MI is rematerializable
bool isReMaterializable(const MachineInstr &MI);
+ /// Rematerializes all instructions in PreRARematStage::Rematerializations
+ /// and stores the achieved occupancy after remat in
+ /// PreRARematStage::AchievedOcc.
+ void rematerialize();
+
/// If remat alone did not increase occupancy to the target one, rollbacks all
/// rematerializations and resets live-ins/RP in all regions impacted by the
/// stage to their pre-stage values.
@@ -676,12 +523,7 @@ class PreRARematStage : public GCNSchedStage {
bool shouldRevertScheduling(unsigned WavesAfter) override;
PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
- : GCNSchedStage(StageID, DAG), TargetRegions(DAG.Regions.size()),
- RescheduleRegions(DAG.Regions.size()) {
- const unsigned NumRegions = DAG.Regions.size();
- RPTargets.reserve(NumRegions);
- RegionBB.reserve(NumRegions);
- }
+ : GCNSchedStage(StageID, DAG), RescheduleRegions(DAG.Regions.size()) {}
};
class ILPInitialScheduleStage : public GCNSchedStage {
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-rematerialization-scoring.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-rematerialization-scoring.mir
deleted file mode 100644
index 0bfcb638038fd..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-rematerialization-scoring.mir
+++ /dev/null
@@ -1,523 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -run-pass=machine-scheduler -amdgpu-disable-unclustered-high-rp-reschedule -verify-machineinstrs %s -o - | FileCheck %s
-
-# All tests are almost identical, the only differences being that some
-# VGPR-defining instructions are progressively made artificially
-# unrematerializable with an implicit def to test rematerialization
-# priorities. The CFG is the following for all tests in the file.
-#
-# +---+
-# | 0 |
-# +---+
-# |
-# v
-# +---+
-# +------>| 1 |-----+
-# | +---+ |
-# | | v
-# | | +---+
-# | | | 2 |
-# | | +-+-+
-# | v |
-# +---+ +---+ |
-# | 4 |<----| 3 |<----+
-# +---+ +---+
-# |
-# v
-# +---+
-# | 5 |
-# +---+
-
-# %32's defining and using region frequencies are identical therefore it is the
-# best register to rematerialize.
-name: favor_same_frequency
-tracksRegLiveness: true
-machineFunctionInfo:
- isEntryFunction: true
-body: |
- ; CHECK-LABEL: name: favor_same_frequency
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %mem_addr:sgpr_64 = COPY $sgpr0_sgpr1
- ; CHECK-NEXT: %loop_if_bound:vgpr_32 = COPY $vgpr0
- ; CHECK-NEXT: %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
- ; CHECK-NEXT: %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, %loop_if_bound, implicit $exec
- ; CHECK-NEXT: %loop_counter:sreg_32 = COPY %mem_data.sub1
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %exec_save_if:sreg_64 = COPY $exec, implicit-def $exec
- ; CHECK-NEXT: %exec_if:sreg_64 = S_AND_B64 %exec_save_if, %exec_loop_mask, implicit-def dead $scc
- ; CHECK-NEXT: $exec = S_MOV_B64_term %exec_if
- ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
- ; CHECK-NEXT: S_BRANCH %bb.2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: successors: %bb.3(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_24]], implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]]
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.3:
- ; CHECK-NEXT: successors: %bb.4(0x7c000000), %bb.5(0x04000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $exec = S_OR_B64 $exec, %exec_save_if, implicit-def $scc
- ; CHECK-NEXT: %loop_counter:sreg_32 = S_ADD_I32 %loop_counter, -1, implicit-def dead $scc
- ; CHECK-NEXT: S_CMP_LG_U32 %loop_counter, 0, implicit-def $scc
- ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.5, implicit killed $scc
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.4:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]]
- ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]], implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]], implicit [[V_CVT_I32_F64_e32_15]]
- ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]], implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]]
- ; CHECK-NEXT: S_BRANCH %bb.1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.5:
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
- ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_32]]
- ; CHECK-NEXT: S_ENDPGM 0
- bb.0:
- liveins: $vgpr0, $sgpr0_sgpr1
-
- %mem_addr:sgpr_64 = COPY $sgpr0_sgpr1
- %loop_if_bound:vgpr_32 = COPY $vgpr0
- %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, killed %loop_if_bound, implicit $exec
- %loop_counter:sreg_32 = COPY %mem_data.sub1
-
- %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
- %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
- %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
- %3:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
- %4:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode
- %5:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode
- %6:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode
- %7:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode
- %8:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode
- %9:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode
- %10:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode
- %11:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode
- %12:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode
- %13:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode
- %14:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode
- %15:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode
- %16:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode
- %17:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode
- %18:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode
- %19:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode
- %20:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode
- %21:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode
- %22:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
- %23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
- %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode
- %25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
- %26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode
- %27:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
- %28:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
- %29:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
- %30:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
- %31:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
- %32:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
-
- bb.1:
- successors: %bb.2, %bb.3
-
- %exec_save_if:sreg_64 = COPY $exec, implicit-def $exec
- %exec_if:sreg_64 = S_AND_B64 %exec_save_if, %exec_loop_mask, implicit-def dead $scc
- $exec = S_MOV_B64_term %exec_if
- S_CBRANCH_EXECZ %bb.3, implicit $exec
- S_BRANCH %bb.2
-
- bb.2:
- successors: %bb.3
-
- S_NOP 0, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31
-
- bb.3:
- successors: %bb.4(0x7c000000), %bb.5(0x04000000)
-
- $exec = S_OR_B64 $exec, %exec_save_if, implicit-def $scc
- %loop_counter:sreg_32 = S_ADD_I32 %loop_counter, -1, implicit-def dead $scc
- S_CMP_LG_U32 %loop_counter, 0, implicit-def $scc
- S_CBRANCH_SCC0 %bb.5, implicit killed $scc
-
- bb.4:
- successors: %bb.1
-
- S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7
- S_NOP 0, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15
- S_NOP 0, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23
-
- S_BRANCH %bb.1
-
- bb.5:
-
- S_NOP 0, implicit %32
-
- S_ENDPGM 0
-...
----
-# bb.2's frequency is lesser than bb.4's therefore it is preferable to
-# rematerialize registers in bb.2 instead of bb.4.
-name: favor_lower_frequency
-tracksRegLiveness: true
-machineFunctionInfo:
- isEntryFunction: true
-body: |
- ; CHECK-LABEL: name: favor_lower_frequency
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %mem_addr:sgpr_64 = COPY $sgpr0_sgpr1
- ; CHECK-NEXT: %loop_if_bound:vgpr_32 = COPY $vgpr0
- ; CHECK-NEXT: %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
- ; CHECK-NEXT: %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, %loop_if_bound, implicit $exec
- ; CHECK-NEXT: %loop_counter:sreg_32 = COPY %mem_data.sub1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %exec_save_if:sreg_64 = COPY $exec, implicit-def $exec
- ; CHECK-NEXT: %exec_if:sreg_64 = S_AND_B64 %exec_save_if, %exec_loop_mask, implicit-def dead $scc
- ; CHECK-NEXT: $exec = S_MOV_B64_term %exec_if
- ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
- ; CHECK-NEXT: S_BRANCH %bb.2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: successors: %bb.3(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
- ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_24]], implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]]
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.3:
- ; CHECK-NEXT: successors: %bb.4(0x7c000000), %bb.5(0x04000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $exec = S_OR_B64 $exec, %exec_save_if, implicit-def $scc
- ; CHECK-NEXT: %loop_counter:sreg_32 = S_ADD_I32 %loop_counter, -1, implicit-def dead $scc
- ; CHECK-NEXT: S_CMP_LG_U32 %loop_counter, 0, implicit-def $scc
- ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.5, implicit killed $scc
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.4:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
- ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]]
- ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]], implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
- ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]], implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]]
- ; CHECK-NEXT: S_BRANCH %bb.1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.5:
- ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]]
- ; CHECK-NEXT: S_ENDPGM 0
- bb.0:
- liveins: $vgpr0, $sgpr0_sgpr1
-
- %mem_addr:sgpr_64 = COPY $sgpr0_sgpr1
- %loop_if_bound:vgpr_32 = COPY $vgpr0
- %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, killed %loop_if_bound, implicit $exec
- %loop_counter:sreg_32 = COPY %mem_data.sub1
-
- %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
- %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
- %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
- %3:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
- %4:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode
- %5:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode
- %6:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode
- %7:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode
- %8:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode
- %9:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode
- %10:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode
- %11:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode
- %12:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode
- %13:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode
- %14:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode
- %15:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode
- %16:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode
- %17:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode
- %18:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode
- %19:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode
- %20:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode
- %21:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode
- %22:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
- %23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
- %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode
- %25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
- %26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode
- %27:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
- %28:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
- %29:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
- %30:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
- %31:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
- %32:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
-
- bb.1:
- successors: %bb.2, %bb.3
-
- %exec_save_if:sreg_64 = COPY $exec, implicit-def $exec
- %exec_if:sreg_64 = S_AND_B64 %exec_save_if, %exec_loop_mask, implicit-def dead $scc
- $exec = S_MOV_B64_term %exec_if
- S_CBRANCH_EXECZ %bb.3, implicit $exec
- S_BRANCH %bb.2
-
- bb.2:
- successors: %bb.3
-
- S_NOP 0, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31
-
- bb.3:
- successors: %bb.4(0x7c000000), %bb.5(0x04000000)
-
- $exec = S_OR_B64 $exec, %exec_save_if, implicit-def $scc
- %loop_counter:sreg_32 = S_ADD_I32 %loop_counter, -1, implicit-def dead $scc
- S_CMP_LG_U32 %loop_counter, 0, implicit-def $scc
- S_CBRANCH_SCC0 %bb.5, implicit killed $scc
-
- bb.4:
- successors: %bb.1
-
- S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7
- S_NOP 0, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15
- S_NOP 0, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23
-
- S_BRANCH %bb.1
-
- bb.5:
-
- S_NOP 0, implicit %32
-
- S_ENDPGM 0
-...
----
-# Rematerializing registers used in bb.4 is the only option.
-name: remat_in_only_possible_region
-tracksRegLiveness: true
-machineFunctionInfo:
- isEntryFunction: true
-body: |
- ; CHECK-LABEL: name: remat_in_only_possible_region
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %mem_addr:sgpr_64 = COPY $sgpr0_sgpr1
- ; CHECK-NEXT: %loop_if_bound:vgpr_32 = COPY $vgpr0
- ; CHECK-NEXT: %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
- ; CHECK-NEXT: %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, %loop_if_bound, implicit $exec
- ; CHECK-NEXT: %loop_counter:sreg_32 = COPY %mem_data.sub1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %exec_save_if:sreg_64 = COPY $exec, implicit-def $exec
- ; CHECK-NEXT: %exec_if:sreg_64 = S_AND_B64 %exec_save_if, %exec_loop_mask, implicit-def dead $scc
- ; CHECK-NEXT: $exec = S_MOV_B64_term %exec_if
- ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
- ; CHECK-NEXT: S_BRANCH %bb.2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: successors: %bb.3(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]], implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]]
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.3:
- ; CHECK-NEXT: successors: %bb.4(0x7c000000), %bb.5(0x04000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $exec = S_OR_B64 $exec, %exec_save_if, implicit-def $scc
- ; CHECK-NEXT: %loop_counter:sreg_32 = S_ADD_I32 %loop_counter, -1, implicit-def dead $scc
- ; CHECK-NEXT: S_CMP_LG_U32 %loop_counter, 0, implicit-def $scc
- ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.5, implicit killed $scc
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.4:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode
- ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_24]], implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]]
- ; CHECK-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode
- ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]]
- ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]], implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
- ; CHECK-NEXT: S_BRANCH %bb.1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.5:
- ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]]
- ; CHECK-NEXT: S_ENDPGM 0
- bb.0:
- liveins: $vgpr0, $sgpr0_sgpr1
-
- %mem_addr:sgpr_64 = COPY $sgpr0_sgpr1
- %loop_if_bound:vgpr_32 = COPY $vgpr0
- %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, killed %loop_if_bound, implicit $exec
- %loop_counter:sreg_32 = COPY %mem_data.sub1
-
- %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
- %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
- %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
- %3:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
- %4:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode
- %5:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode
- %6:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode
- %7:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode
- %8:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode
- %9:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode
- %10:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode
- %11:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode
- %12:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode
- %13:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode
- %14:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode
- %15:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode
- %16:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode
- %17:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode
- %18:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode
- %19:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode
- %20:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode
- %21:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode
- %22:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
- %23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
- %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
- %25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
- %26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
- %27:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
- %28:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
- %29:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
- %30:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
- %31:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
- %32:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
-
- bb.1:
- successors: %bb.2, %bb.3
-
- %exec_save_if:sreg_64 = COPY $exec, implicit-def $exec
- %exec_if:sreg_64 = S_AND_B64 %exec_save_if, %exec_loop_mask, implicit-def dead $scc
- $exec = S_MOV_B64_term %exec_if
- S_CBRANCH_EXECZ %bb.3, implicit $exec
- S_BRANCH %bb.2
-
- bb.2:
- successors: %bb.3
-
- S_NOP 0, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31
-
- bb.3:
- successors: %bb.4(0x7c000000), %bb.5(0x04000000)
-
- $exec = S_OR_B64 $exec, %exec_save_if, implicit-def $scc
- %loop_counter:sreg_32 = S_ADD_I32 %loop_counter, -1, implicit-def dead $scc
- S_CMP_LG_U32 %loop_counter, 0, implicit-def $scc
- S_CBRANCH_SCC0 %bb.5, implicit killed $scc
-
- bb.4:
- successors: %bb.1
-
- S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7
- S_NOP 0, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15
- S_NOP 0, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23
-
- S_BRANCH %bb.1
-
- bb.5:
-
- S_NOP 0, implicit %32
-
- S_ENDPGM 0
-...
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir
index 1daa709ab6439..3b3ea3f37db80 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir
@@ -37,89 +37,88 @@ body: |
; GFX908: bb.0:
; GFX908-NEXT: successors: %bb.1(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 6
- ; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 7
- ; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 8
- ; GFX908-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 9
- ; GFX908-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 10
- ; GFX908-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
- ; GFX908-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 12
- ; GFX908-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 13
- ; GFX908-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 14
- ; GFX908-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 15
- ; GFX908-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 16
- ; GFX908-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 17
- ; GFX908-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 18
- ; GFX908-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 19
- ; GFX908-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 20
- ; GFX908-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 21
- ; GFX908-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
- ; GFX908-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
- ; GFX908-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
- ; GFX908-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
- ; GFX908-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
- ; GFX908-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
- ; GFX908-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
- ; GFX908-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
- ; GFX908-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 30
- ; GFX908-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 31
- ; GFX908-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 32
- ; GFX908-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 33
- ; GFX908-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 34
- ; GFX908-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 35
- ; GFX908-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 36
- ; GFX908-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 37
- ; GFX908-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 38
- ; GFX908-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 39
- ; GFX908-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 40
- ; GFX908-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 41
- ; GFX908-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 42
- ; GFX908-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 43
- ; GFX908-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 44
- ; GFX908-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 45
- ; GFX908-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 46
- ; GFX908-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
- ; GFX908-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 48
- ; GFX908-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 49
- ; GFX908-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 50
- ; GFX908-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 51
- ; GFX908-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 52
- ; GFX908-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 53
- ; GFX908-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 54
- ; GFX908-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 55
- ; GFX908-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 56
- ; GFX908-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 57
- ; GFX908-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 58
- ; GFX908-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 59
- ; GFX908-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 60
- ; GFX908-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 61
- ; GFX908-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 62
- ; GFX908-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 63
- ; GFX908-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 64
- ; GFX908-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 65
- ; GFX908-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 66
- ; GFX908-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 67
- ; GFX908-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 68
- ; GFX908-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
- ; GFX908-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
- ; GFX908-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
- ; GFX908-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
- ; GFX908-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
- ; GFX908-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
- ; GFX908-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
- ; GFX908-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
- ; GFX908-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
- ; GFX908-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
- ; GFX908-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
+ ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
+ ; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 2
+ ; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 3
+ ; GFX908-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 4
+ ; GFX908-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 5
+ ; GFX908-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 6
+ ; GFX908-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 7
+ ; GFX908-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 8
+ ; GFX908-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 9
+ ; GFX908-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 10
+ ; GFX908-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
+ ; GFX908-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 12
+ ; GFX908-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 13
+ ; GFX908-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 14
+ ; GFX908-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 15
+ ; GFX908-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 16
+ ; GFX908-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 17
+ ; GFX908-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 18
+ ; GFX908-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 19
+ ; GFX908-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 20
+ ; GFX908-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 21
+ ; GFX908-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
+ ; GFX908-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
+ ; GFX908-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
+ ; GFX908-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
+ ; GFX908-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
+ ; GFX908-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
+ ; GFX908-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
+ ; GFX908-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
+ ; GFX908-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 30
+ ; GFX908-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 31
+ ; GFX908-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 32
+ ; GFX908-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 33
+ ; GFX908-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 34
+ ; GFX908-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 35
+ ; GFX908-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 36
+ ; GFX908-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 37
+ ; GFX908-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 38
+ ; GFX908-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 39
+ ; GFX908-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 40
+ ; GFX908-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 41
+ ; GFX908-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 42
+ ; GFX908-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 43
+ ; GFX908-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 44
+ ; GFX908-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 45
+ ; GFX908-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 46
+ ; GFX908-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
+ ; GFX908-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 48
+ ; GFX908-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 49
+ ; GFX908-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 50
+ ; GFX908-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 51
+ ; GFX908-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 52
+ ; GFX908-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 53
+ ; GFX908-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 54
+ ; GFX908-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 55
+ ; GFX908-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 56
+ ; GFX908-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 57
+ ; GFX908-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 58
+ ; GFX908-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 59
+ ; GFX908-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 60
+ ; GFX908-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 61
+ ; GFX908-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 62
+ ; GFX908-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 63
+ ; GFX908-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 64
+ ; GFX908-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 65
+ ; GFX908-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 66
+ ; GFX908-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 67
+ ; GFX908-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 68
+ ; GFX908-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
+ ; GFX908-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
+ ; GFX908-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
+ ; GFX908-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
+ ; GFX908-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
+ ; GFX908-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
+ ; GFX908-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
+ ; GFX908-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
+ ; GFX908-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
+ ; GFX908-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
+ ; GFX908-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
- ; GFX908-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
- ; GFX908-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
- ; GFX908-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 2
- ; GFX908-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 3
- ; GFX908-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 4
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_74]], implicit [[S_MOV_B32_75]], implicit [[S_MOV_B32_76]], implicit [[S_MOV_B32_77]], implicit [[S_MOV_B32_78]]
- ; GFX908-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 5
+ ; GFX908-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_79]], implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]]
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_4]], implicit [[S_MOV_B32_5]], implicit [[S_MOV_B32_6]], implicit [[S_MOV_B32_7]], implicit [[S_MOV_B32_8]]
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_9]], implicit [[S_MOV_B32_10]], implicit [[S_MOV_B32_11]], implicit [[S_MOV_B32_12]], implicit [[S_MOV_B32_13]]
@@ -135,95 +134,95 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_59]], implicit [[S_MOV_B32_60]], implicit [[S_MOV_B32_61]], implicit [[S_MOV_B32_62]], implicit [[S_MOV_B32_63]]
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_64]], implicit [[S_MOV_B32_65]], implicit [[S_MOV_B32_66]], implicit [[S_MOV_B32_67]], implicit [[S_MOV_B32_68]]
; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_69]], implicit [[S_MOV_B32_70]], implicit [[S_MOV_B32_71]], implicit [[S_MOV_B32_72]], implicit [[S_MOV_B32_73]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_74]], implicit [[S_MOV_B32_75]], implicit [[S_MOV_B32_76]], implicit [[S_MOV_B32_77]], implicit [[S_MOV_B32_78]]
; GFX908-NEXT: S_ENDPGM 0
;
; GFX90A-LABEL: name: small_num_sgprs_as_spill
; GFX90A: bb.0:
; GFX90A-NEXT: successors: %bb.1(0x80000000)
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 6
- ; GFX90A-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 7
- ; GFX90A-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 8
- ; GFX90A-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 9
- ; GFX90A-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 10
- ; GFX90A-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
- ; GFX90A-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 12
- ; GFX90A-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 13
- ; GFX90A-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 14
- ; GFX90A-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 15
- ; GFX90A-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 16
- ; GFX90A-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 17
- ; GFX90A-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 18
- ; GFX90A-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 19
- ; GFX90A-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 20
- ; GFX90A-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 21
- ; GFX90A-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
- ; GFX90A-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
- ; GFX90A-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
- ; GFX90A-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
- ; GFX90A-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
- ; GFX90A-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
- ; GFX90A-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
- ; GFX90A-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
- ; GFX90A-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 30
- ; GFX90A-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 31
- ; GFX90A-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 32
- ; GFX90A-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 33
- ; GFX90A-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 34
- ; GFX90A-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 35
- ; GFX90A-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 36
- ; GFX90A-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 37
- ; GFX90A-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 38
- ; GFX90A-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 39
- ; GFX90A-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 40
- ; GFX90A-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 41
- ; GFX90A-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 42
- ; GFX90A-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 43
- ; GFX90A-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 44
- ; GFX90A-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 45
- ; GFX90A-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 46
- ; GFX90A-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
- ; GFX90A-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 48
- ; GFX90A-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 49
- ; GFX90A-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 50
- ; GFX90A-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 51
- ; GFX90A-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 52
- ; GFX90A-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 53
- ; GFX90A-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 54
- ; GFX90A-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 55
- ; GFX90A-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 56
- ; GFX90A-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 57
- ; GFX90A-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 58
- ; GFX90A-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 59
- ; GFX90A-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 60
- ; GFX90A-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 61
- ; GFX90A-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 62
- ; GFX90A-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 63
- ; GFX90A-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 64
- ; GFX90A-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 65
- ; GFX90A-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 66
- ; GFX90A-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 67
- ; GFX90A-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 68
- ; GFX90A-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
- ; GFX90A-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
- ; GFX90A-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
- ; GFX90A-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
- ; GFX90A-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
- ; GFX90A-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
- ; GFX90A-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
- ; GFX90A-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
- ; GFX90A-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
- ; GFX90A-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
- ; GFX90A-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
+ ; GFX90A-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
+ ; GFX90A-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 2
+ ; GFX90A-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 3
+ ; GFX90A-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 4
+ ; GFX90A-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 5
+ ; GFX90A-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 6
+ ; GFX90A-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 7
+ ; GFX90A-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 8
+ ; GFX90A-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 9
+ ; GFX90A-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 10
+ ; GFX90A-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
+ ; GFX90A-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 12
+ ; GFX90A-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 13
+ ; GFX90A-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 14
+ ; GFX90A-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 15
+ ; GFX90A-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 16
+ ; GFX90A-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 17
+ ; GFX90A-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 18
+ ; GFX90A-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 19
+ ; GFX90A-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 20
+ ; GFX90A-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 21
+ ; GFX90A-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
+ ; GFX90A-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
+ ; GFX90A-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
+ ; GFX90A-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
+ ; GFX90A-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
+ ; GFX90A-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
+ ; GFX90A-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
+ ; GFX90A-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
+ ; GFX90A-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 30
+ ; GFX90A-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 31
+ ; GFX90A-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 32
+ ; GFX90A-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 33
+ ; GFX90A-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 34
+ ; GFX90A-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 35
+ ; GFX90A-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 36
+ ; GFX90A-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 37
+ ; GFX90A-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 38
+ ; GFX90A-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 39
+ ; GFX90A-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 40
+ ; GFX90A-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 41
+ ; GFX90A-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 42
+ ; GFX90A-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 43
+ ; GFX90A-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 44
+ ; GFX90A-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 45
+ ; GFX90A-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 46
+ ; GFX90A-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
+ ; GFX90A-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 48
+ ; GFX90A-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 49
+ ; GFX90A-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 50
+ ; GFX90A-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 51
+ ; GFX90A-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 52
+ ; GFX90A-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 53
+ ; GFX90A-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 54
+ ; GFX90A-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 55
+ ; GFX90A-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 56
+ ; GFX90A-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 57
+ ; GFX90A-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 58
+ ; GFX90A-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 59
+ ; GFX90A-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 60
+ ; GFX90A-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 61
+ ; GFX90A-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 62
+ ; GFX90A-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 63
+ ; GFX90A-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 64
+ ; GFX90A-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 65
+ ; GFX90A-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 66
+ ; GFX90A-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 67
+ ; GFX90A-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 68
+ ; GFX90A-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
+ ; GFX90A-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
+ ; GFX90A-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
+ ; GFX90A-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
+ ; GFX90A-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
+ ; GFX90A-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
+ ; GFX90A-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
+ ; GFX90A-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
+ ; GFX90A-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
+ ; GFX90A-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
+ ; GFX90A-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.1:
- ; GFX90A-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
- ; GFX90A-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
- ; GFX90A-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 2
- ; GFX90A-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 3
- ; GFX90A-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 4
- ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_74]], implicit [[S_MOV_B32_75]], implicit [[S_MOV_B32_76]], implicit [[S_MOV_B32_77]], implicit [[S_MOV_B32_78]]
- ; GFX90A-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 5
+ ; GFX90A-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_79]], implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]]
; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_4]], implicit [[S_MOV_B32_5]], implicit [[S_MOV_B32_6]], implicit [[S_MOV_B32_7]], implicit [[S_MOV_B32_8]]
; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_9]], implicit [[S_MOV_B32_10]], implicit [[S_MOV_B32_11]], implicit [[S_MOV_B32_12]], implicit [[S_MOV_B32_13]]
@@ -239,6 +238,7 @@ body: |
; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_59]], implicit [[S_MOV_B32_60]], implicit [[S_MOV_B32_61]], implicit [[S_MOV_B32_62]], implicit [[S_MOV_B32_63]]
; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_64]], implicit [[S_MOV_B32_65]], implicit [[S_MOV_B32_66]], implicit [[S_MOV_B32_67]], implicit [[S_MOV_B32_68]]
; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_69]], implicit [[S_MOV_B32_70]], implicit [[S_MOV_B32_71]], implicit [[S_MOV_B32_72]], implicit [[S_MOV_B32_73]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_74]], implicit [[S_MOV_B32_75]], implicit [[S_MOV_B32_76]], implicit [[S_MOV_B32_77]], implicit [[S_MOV_B32_78]]
; GFX90A-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1
@@ -796,6 +796,9 @@ body: |
; GFX908-NEXT: [[DEF26:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF27:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF28:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF29:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF30:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF31:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
@@ -836,18 +839,15 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]]
- ; GFX908-NEXT: [[DEF29:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF30:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: [[DEF31:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]], implicit [[DEF29]], implicit [[DEF30]], implicit [[DEF31]]
; GFX908-NEXT: [[DEF32:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF32]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]], implicit [[DEF13]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]], implicit [[DEF18]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]], implicit [[DEF23]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]], implicit [[V_CVT_I32_F64_e32_31]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF28]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]], implicit [[DEF32]], implicit [[DEF]], implicit [[DEF1]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF7]], implicit [[DEF8]], implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF12]], implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF17]], implicit [[DEF18]], implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF22]], implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF27]], implicit [[DEF28]], implicit [[DEF29]], implicit [[DEF30]], implicit [[V_CVT_I32_F64_e32_31]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF31]]
; GFX908-NEXT: S_ENDPGM 0
;
; GFX90A-LABEL: name: reduce_arch_and_acc_vgrp_spill
@@ -910,6 +910,9 @@ body: |
; GFX90A-NEXT: [[DEF26:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF27:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF28:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF29:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF30:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF31:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
@@ -924,18 +927,15 @@ body: |
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]]
- ; GFX90A-NEXT: [[DEF29:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: [[DEF30:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: [[DEF31:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]], implicit [[DEF29]], implicit [[DEF30]], implicit [[DEF31]]
; GFX90A-NEXT: [[DEF32:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF32]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]], implicit [[DEF13]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]], implicit [[DEF18]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]], implicit [[DEF23]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]], implicit [[V_CVT_I32_F64_e32_31]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF28]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]], implicit [[DEF32]], implicit [[DEF]], implicit [[DEF1]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF7]], implicit [[DEF8]], implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF12]], implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF17]], implicit [[DEF18]], implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF22]], implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF27]], implicit [[DEF28]], implicit [[DEF29]], implicit [[DEF30]], implicit [[V_CVT_I32_F64_e32_31]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF31]]
; GFX90A-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1
@@ -2174,8 +2174,6 @@ body: |
; GFX908-NEXT: [[DEF243:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF244:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF245:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: bb.1:
; GFX908-NEXT: [[DEF246:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF247:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF248:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2186,7 +2184,8 @@ body: |
; GFX908-NEXT: [[DEF253:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF254:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF255:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF246]], implicit [[DEF247]], implicit [[DEF248]], implicit [[DEF249]], implicit [[DEF250]], implicit [[DEF251]], implicit [[DEF252]], implicit [[DEF253]], implicit [[DEF254]], implicit [[DEF255]]
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.1:
; GFX908-NEXT: [[DEF256:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: S_NOP 0, implicit [[DEF256]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]]
; GFX908-NEXT: S_NOP 0, implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]], implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]], implicit [[DEF18]]
@@ -2212,7 +2211,8 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[DEF209]], implicit [[DEF210]], implicit [[DEF211]], implicit [[DEF212]], implicit [[DEF213]], implicit [[DEF214]], implicit [[DEF215]], implicit [[DEF216]], implicit [[DEF217]], implicit [[DEF218]]
; GFX908-NEXT: S_NOP 0, implicit [[DEF219]], implicit [[DEF220]], implicit [[DEF221]], implicit [[DEF222]], implicit [[DEF223]], implicit [[DEF224]], implicit [[DEF225]], implicit [[DEF226]], implicit [[DEF227]], implicit [[DEF228]]
; GFX908-NEXT: S_NOP 0, implicit [[DEF229]], implicit [[DEF230]], implicit [[DEF231]], implicit [[DEF232]], implicit [[DEF233]], implicit [[DEF234]], implicit [[DEF235]], implicit [[DEF236]], implicit [[DEF237]], implicit [[DEF238]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF239]], implicit [[DEF240]], implicit [[DEF241]], implicit [[DEF242]], implicit [[DEF243]], implicit [[DEF244]], implicit [[DEF245]], implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF239]], implicit [[DEF240]], implicit [[DEF241]], implicit [[DEF242]], implicit [[DEF243]], implicit [[DEF244]], implicit [[DEF245]], implicit [[DEF246]], implicit [[DEF247]], implicit [[DEF248]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF249]], implicit [[DEF250]], implicit [[DEF251]], implicit [[DEF252]], implicit [[DEF253]], implicit [[DEF254]], implicit [[DEF255]], implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
; GFX908-NEXT: S_ENDPGM 0
;
; GFX90A-LABEL: name: reduce_spill_agpr_above_addressable_limit
@@ -2465,10 +2465,6 @@ body: |
; GFX90A-NEXT: [[DEF243:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF244:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF245:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 257, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 258, implicit $exec, implicit $mode
- ; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.1:
; GFX90A-NEXT: [[DEF246:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF247:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF248:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2479,7 +2475,10 @@ body: |
; GFX90A-NEXT: [[DEF253:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF254:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF255:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF246]], implicit [[DEF247]], implicit [[DEF248]], implicit [[DEF249]], implicit [[DEF250]], implicit [[DEF251]], implicit [[DEF252]], implicit [[DEF253]], implicit [[DEF254]], implicit [[DEF255]]
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 257, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 258, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: bb.1:
; GFX90A-NEXT: [[DEF256:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: S_NOP 0, implicit [[DEF256]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]]
; GFX90A-NEXT: S_NOP 0, implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]], implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]], implicit [[DEF18]]
@@ -2505,7 +2504,8 @@ body: |
; GFX90A-NEXT: S_NOP 0, implicit [[DEF209]], implicit [[DEF210]], implicit [[DEF211]], implicit [[DEF212]], implicit [[DEF213]], implicit [[DEF214]], implicit [[DEF215]], implicit [[DEF216]], implicit [[DEF217]], implicit [[DEF218]]
; GFX90A-NEXT: S_NOP 0, implicit [[DEF219]], implicit [[DEF220]], implicit [[DEF221]], implicit [[DEF222]], implicit [[DEF223]], implicit [[DEF224]], implicit [[DEF225]], implicit [[DEF226]], implicit [[DEF227]], implicit [[DEF228]]
; GFX90A-NEXT: S_NOP 0, implicit [[DEF229]], implicit [[DEF230]], implicit [[DEF231]], implicit [[DEF232]], implicit [[DEF233]], implicit [[DEF234]], implicit [[DEF235]], implicit [[DEF236]], implicit [[DEF237]], implicit [[DEF238]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF239]], implicit [[DEF240]], implicit [[DEF241]], implicit [[DEF242]], implicit [[DEF243]], implicit [[DEF244]], implicit [[DEF245]], implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF239]], implicit [[DEF240]], implicit [[DEF241]], implicit [[DEF242]], implicit [[DEF243]], implicit [[DEF244]], implicit [[DEF245]], implicit [[DEF246]], implicit [[DEF247]], implicit [[DEF248]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF249]], implicit [[DEF250]], implicit [[DEF251]], implicit [[DEF252]], implicit [[DEF253]], implicit [[DEF254]], implicit [[DEF255]], implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
; GFX90A-NEXT: S_ENDPGM 0
bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-debug.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-debug.mir
index 06b742f14e682..371753801d1a3 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-debug.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-debug.mir
@@ -19,8 +19,8 @@ body: |
; DEBUG: Machine code for function sink_and_inc_idx_when_skipping_small_region_1: IsSSA, NoPHIs, TracksLiveness
; DEBUG: [PreRARemat] Retrying function scheduling with new min. occupancy of 10 from rematerializing (original was 9, target was 10)
; DEBUG-NEXT: ********** MI Scheduling **********
- ; DEBUG-NEXT: sink_and_inc_idx_when_skipping_small_region_1:%bb.1
- ; DEBUG-NEXT: From: %23:vgpr_32 = nofpexcept DBG_VALUE 23, implicit $exec, implicit $mode
+ ; DEBUG-NEXT: sink_and_inc_idx_when_skipping_small_region_1:%bb.2
+ ; DEBUG-NEXT: From: %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
; DEBUG-NEXT: To: End RegionInstrs: 2
bb.0:
successors: %bb.1
@@ -91,9 +91,9 @@ body: |
; DEBUG: Machine code for function sink_and_inc_idx_when_skipping_small_regions_2: IsSSA, NoPHIs, TracksLiveness
; DEBUG: [PreRARemat] Retrying function scheduling with new min. occupancy of 10 from rematerializing (original was 9, target was 10)
; DEBUG-NEXT: ********** MI Scheduling **********
- ; DEBUG-NEXT: sink_and_inc_idx_when_skipping_small_regions_2:%bb.1
- ; DEBUG-NEXT: From: %23:vgpr_32 = nofpexcept DBG_VALUE 23, implicit $exec, implicit $mode
- ; DEBUG-NEXT: To: End RegionInstrs: 2
+ ; DEBUG-NEXT: sink_and_inc_idx_when_skipping_small_regions_2:%bb.2
+ ; DEBUG-NEXT: From: %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; DEBUG-NEXT: To: End RegionInstrs: 4
bb.0:
successors: %bb.1
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
index 2cf1aea037b7c..8d24f6ba66968 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
@@ -3084,13 +3084,9 @@ body: |
; GFX908-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
; GFX908-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
; GFX908-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
; GFX908-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
; GFX908-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
; GFX908-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
@@ -3100,7 +3096,11 @@ body: |
; GFX908-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 81
; GFX908-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 82
; GFX908-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 83
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 84
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
@@ -9419,7 +9419,7 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
- ; GFX908-NEXT: DBG_VALUE %23:vgpr_32, 0, 0
+ ; GFX908-NEXT: DBG_VALUE %23, 0, 0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_24]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
@@ -9471,7 +9471,7 @@ body: |
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]]
; GFX908-GCNTRACKERS-NEXT: {{ $}}
; GFX908-GCNTRACKERS-NEXT: bb.2:
- ; GFX908-GCNTRACKERS-NEXT: DBG_VALUE %23:vgpr_32, 0, 0
+ ; GFX908-GCNTRACKERS-NEXT: DBG_VALUE %23, 0, 0
; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_24]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
@@ -10269,13 +10269,13 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
@@ -10291,9 +10291,7 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_34:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 34, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
@@ -10304,16 +10302,16 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]], implicit [[V_CVT_I32_F64_e32_34]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]], implicit [[V_CVT_I32_F64_e32_34]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]]
; GFX908-NEXT: S_ENDPGM 0
;
; GFX908-GCNTRACKERS-LABEL: name: test_rollback_remat_defregion_above_target
@@ -10330,13 +10328,13 @@ body: |
; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
@@ -10352,9 +10350,7 @@ body: |
; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_34:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 34, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: {{ $}}
; GFX908-GCNTRACKERS-NEXT: bb.1:
; GFX908-GCNTRACKERS-NEXT: successors: %bb.2(0x80000000)
@@ -10365,16 +10361,16 @@ body: |
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]], implicit [[V_CVT_I32_F64_e32_34]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]]
; GFX908-GCNTRACKERS-NEXT: {{ $}}
; GFX908-GCNTRACKERS-NEXT: bb.2:
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]], implicit [[V_CVT_I32_F64_e32_34]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]]
; GFX908-GCNTRACKERS-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1
@@ -10389,13 +10385,13 @@ body: |
%7:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
%8:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
%9:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
- %10:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode
- %11:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode
+ %10:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
+ %11:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
%12:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
%13:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
%14:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
%15:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
- %16:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode
+ %16:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
%17:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
%18:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
%19:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
@@ -10411,30 +10407,28 @@ body: |
%29:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
%30:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
%31:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
- %32:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
- %33:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode, implicit-def $m0
- %34:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 34, implicit $exec, implicit $mode, implicit-def $m0
+ %32:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
bb.1:
successors: %bb.2
- S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4
+ S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4,
S_NOP 0, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9
S_NOP 0, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14
S_NOP 0, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19
S_NOP 0, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24
S_NOP 0, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29
- S_NOP 0, implicit %30, implicit %31, implicit %32, implicit %33, implicit %34
+ S_NOP 0, implicit %30, implicit %31, implicit %32
bb.2:
- S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4
+ S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4,
S_NOP 0, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9
- S_NOP 0, implicit %12, implicit %13, implicit %14
- S_NOP 0, implicit %15, implicit %17, implicit %18, implicit %19
+ S_NOP 0, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14
+ S_NOP 0, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19
S_NOP 0, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24
S_NOP 0, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29
- S_NOP 0, implicit %30, implicit %31, implicit %32, implicit %33, implicit %34
+ S_NOP 0, implicit %30, implicit %31
S_ENDPGM 0
...
@@ -10633,12 +10627,12 @@ body: |
S_ENDPGM 0
...
---
-name: test_rollback_remats_emptydefregion_block
+name: test_rollback_remats_emptydefregion
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
- ; GFX908-LABEL: name: test_rollback_remats_emptydefregion_block
+ ; GFX908-LABEL: name: test_rollback_remats_emptydefregion
; GFX908: bb.0:
; GFX908-NEXT: successors: %bb.1(0x80000000)
; GFX908-NEXT: {{ $}}
@@ -10702,7 +10696,7 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]]
; GFX908-NEXT: S_ENDPGM 0
;
- ; GFX908-GCNTRACKERS-LABEL: name: test_rollback_remats_emptydefregion_block
+ ; GFX908-GCNTRACKERS-LABEL: name: test_rollback_remats_emptydefregion
; GFX908-GCNTRACKERS: bb.0:
; GFX908-GCNTRACKERS-NEXT: successors: %bb.1(0x80000000)
; GFX908-GCNTRACKERS-NEXT: {{ $}}
@@ -10831,207 +10825,6 @@ body: |
S_ENDPGM 0
...
---
-name: test_rollback_remats_emptydefregion_barrier
-tracksRegLiveness: true
-machineFunctionInfo:
- isEntryFunction: true
-body: |
- ; GFX908-LABEL: name: test_rollback_remats_emptydefregion_barrier
- ; GFX908: bb.0:
- ; GFX908-NEXT: successors: %bb.1(0x80000000)
- ; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: bb.1:
- ; GFX908-NEXT: successors: %bb.2(0x80000000)
- ; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode
- ; GFX908-NEXT: SCHED_BARRIER 0
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
- ; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: bb.2:
- ; GFX908-NEXT: successors: %bb.3(0x80000000)
- ; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]]
- ; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: bb.3:
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]]
- ; GFX908-NEXT: S_ENDPGM 0
- ;
- ; GFX908-GCNTRACKERS-LABEL: name: test_rollback_remats_emptydefregion_barrier
- ; GFX908-GCNTRACKERS: bb.0:
- ; GFX908-GCNTRACKERS-NEXT: successors: %bb.1(0x80000000)
- ; GFX908-GCNTRACKERS-NEXT: {{ $}}
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-GCNTRACKERS-NEXT: {{ $}}
- ; GFX908-GCNTRACKERS-NEXT: bb.1:
- ; GFX908-GCNTRACKERS-NEXT: successors: %bb.2(0x80000000)
- ; GFX908-GCNTRACKERS-NEXT: {{ $}}
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: SCHED_BARRIER 0
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
- ; GFX908-GCNTRACKERS-NEXT: {{ $}}
- ; GFX908-GCNTRACKERS-NEXT: bb.2:
- ; GFX908-GCNTRACKERS-NEXT: successors: %bb.3(0x80000000)
- ; GFX908-GCNTRACKERS-NEXT: {{ $}}
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]]
- ; GFX908-GCNTRACKERS-NEXT: {{ $}}
- ; GFX908-GCNTRACKERS-NEXT: bb.3:
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]]
- ; GFX908-GCNTRACKERS-NEXT: S_ENDPGM 0
- bb.0:
- successors: %bb.1
-
- %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
- %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
- %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
- %3:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
- %4:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
- %5:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
- %6:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
- %7:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
- %8:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
- %9:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
- %10:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
- %11:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
- %12:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
- %13:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
- %14:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
- %15:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
- %16:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
- %17:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
- %18:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
- %19:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
- %20:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
- %21:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
- %22:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
- %23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
- %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
- %25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
- %26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
- %27:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
- %28:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
- %29:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
- %30:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
- %31:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
-
- bb.1:
- successors: %bb.2
-
- %32:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
- %33:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode
- SCHED_BARRIER 0
- S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4
-
- bb.2:
- successors: %bb.3
-
- S_NOP 0, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9
- S_NOP 0, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14
- S_NOP 0, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19
- S_NOP 0, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24
- S_NOP 0, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29
- S_NOP 0, implicit %30, implicit %31, implicit %32, implicit %33
-
- bb.3:
-
- S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4,
- S_NOP 0, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9
- S_NOP 0, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14
- S_NOP 0, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19
- S_NOP 0, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24
- S_NOP 0, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29
- S_NOP 0, implicit %30, implicit %31
-
- S_ENDPGM 0
-...
----
name: test_occ_8_physreg_use
tracksRegLiveness: true
machineFunctionInfo:
diff --git a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
index 63c48121e40c9..8b6bb9b8c5fcd 100644
--- a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
@@ -3902,8 +3902,8 @@ define <32 x float> @test_mfma_loop_non_splat_ret_use() #0 {
; GFX908-NEXT: v_accvgpr_write_b32 a3, 0
; GFX908-NEXT: v_accvgpr_write_b32 a2, 0
; GFX908-NEXT: v_accvgpr_write_b32 a0, 0
-; GFX908-NEXT: v_mov_b32_e32 v0, 1.0
; GFX908-NEXT: s_mov_b32 s4, 16
+; GFX908-NEXT: v_mov_b32_e32 v0, 1.0
; GFX908-NEXT: v_mov_b32_e32 v1, 2.0
; GFX908-NEXT: .LBB11_1: ; %for.cond.preheader
; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
More information about the llvm-commits
mailing list