[llvm] [AMDGPU] Add `GCNRPTarget` to track register pressure against a target (PR #145765)
Lucas Ramirez via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 25 11:57:53 PDT 2025
https://github.com/lucas-rami created https://github.com/llvm/llvm-project/pull/145765
This adds the `GCNRPTarget` class which models a register pressure target (i.e., maximum number of SGPRs/VGPRS) that one can track register savings against. The only current use of this class is in the scheduler's rematerialization stage. It replaces the more ad-hoc (and now deleted) `ExcessRP` class which used to serve the same purpose.
This is only NFC~ish because `GCNRPTarget` tracks VGPR usage more accurately than `ExcessRP` used to. To estimate required combined VGPR savings we now additionally take into account the number of available VGPRs in both banks (ArchVGPR and AGPR) at the time where the RP target is created, whereas we used to only consider explicit savings made from the starting RP. This makes VGPR savings estimations more accurate in cases where we allow for savings in one VGPR bank to help towards reducing pressure in another VGPR bank (see `GCNRPTarget::CombineVGPRSavings`). This is the cause for unit test changes.
>From 1b7074c4cd237c382247e58b68edcc6642431a9b Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Wed, 25 Jun 2025 16:49:56 +0000
Subject: [PATCH] Move excess RP tracking logic to `GCNRPTarget`
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 63 +++++
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 118 +++++++--
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 226 +++---------------
...ine-scheduler-sink-trivial-remats-attr.mir | 157 ++++++------
4 files changed, 272 insertions(+), 292 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index eed3fb20f5be8..7d6723a6108be 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -361,6 +361,69 @@ static LaneBitmask findUseBetween(unsigned Reg, LaneBitmask LastUseMask,
return LastUseMask;
}
+////////////////////////////////////////////////////////////////////////////////
+// GCNRPTarget
+
+GCNRPTarget::GCNRPTarget(const MachineFunction &MF, const GCNRegPressure &RP,
+ bool CombineVGPRSavings)
+ : RP(RP), CombineVGPRSavings(CombineVGPRSavings) {
+ const Function &F = MF.getFunction();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ setRegLimits(ST.getMaxNumSGPRs(F), ST.getMaxNumVGPRs(F), MF);
+}
+
+GCNRPTarget::GCNRPTarget(unsigned NumSGPRs, unsigned NumVGPRs,
+ const MachineFunction &MF, const GCNRegPressure &RP,
+ bool CombineVGPRSavings)
+ : RP(RP), CombineVGPRSavings(CombineVGPRSavings) {
+ setRegLimits(NumSGPRs, NumVGPRs, MF);
+}
+
+GCNRPTarget::GCNRPTarget(unsigned Occupancy, const MachineFunction &MF,
+ const GCNRegPressure &RP, bool CombineVGPRSavings)
+ : RP(RP), CombineVGPRSavings(CombineVGPRSavings) {
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ unsigned DynamicVGPRBlockSize =
+ MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
+ setRegLimits(ST.getMaxNumSGPRs(Occupancy, /*Addressable=*/false),
+ ST.getMaxNumVGPRs(Occupancy, DynamicVGPRBlockSize), MF);
+}
+
+void GCNRPTarget::setRegLimits(unsigned NumSGPRs, unsigned NumVGPRs,
+ const MachineFunction &MF) {
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ unsigned DynamicVGPRBlockSize =
+ MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
+ MaxSGPRs = std::min(ST.getAddressableNumSGPRs(), NumSGPRs);
+ MaxVGPRs = std::min(ST.getAddressableNumArchVGPRs(), NumVGPRs);
+ MaxUnifiedVGPRs =
+ ST.hasGFX90AInsts()
+ ? std::min(ST.getAddressableNumVGPRs(DynamicVGPRBlockSize), NumVGPRs)
+ : 0;
+}
+
+bool GCNRPTarget::isSaveBeneficial(Register Reg,
+ const MachineRegisterInfo &MRI) const {
+ const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(TRI);
+
+ if (SRI->isSGPRClass(RC))
+ return RP.getSGPRNum() > MaxSGPRs;
+ unsigned NumVGPRs =
+ SRI->isAGPRClass(RC) ? RP.getAGPRNum() : RP.getArchVGPRNum();
+ return isVGPRBankSaveBeneficial(NumVGPRs);
+}
+
+bool GCNRPTarget::satisfied() const {
+ if (RP.getSGPRNum() > MaxSGPRs)
+ return false;
+ if (RP.getVGPRNum(false) > MaxVGPRs &&
+ (!CombineVGPRSavings || !satisifiesVGPRBanksTarget()))
+ return false;
+ return satisfiesUnifiedTarget();
+}
+
///////////////////////////////////////////////////////////////////////////////
// GCNRPTracker
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 397e891c8d806..32db3b78aaae5 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -31,9 +31,7 @@ class SlotIndex;
struct GCNRegPressure {
enum RegKind { SGPR, VGPR, AGPR, TOTAL_KINDS };
- GCNRegPressure() {
- clear();
- }
+ GCNRegPressure() { clear(); }
bool empty() const { return !Value[SGPR] && !Value[VGPR] && !Value[AGPR]; }
@@ -76,9 +74,7 @@ struct GCNRegPressure {
DynamicVGPRBlockSize));
}
- void inc(unsigned Reg,
- LaneBitmask PrevMask,
- LaneBitmask NewMask,
+ void inc(unsigned Reg, LaneBitmask PrevMask, LaneBitmask NewMask,
const MachineRegisterInfo &MRI);
bool higherOccupancy(const GCNSubtarget &ST, const GCNRegPressure &O,
@@ -106,9 +102,7 @@ struct GCNRegPressure {
return std::equal(&Value[0], &Value[ValueArraySize], O.Value);
}
- bool operator!=(const GCNRegPressure &O) const {
- return !(*this == O);
- }
+ bool operator!=(const GCNRegPressure &O) const { return !(*this == O); }
GCNRegPressure &operator+=(const GCNRegPressure &RHS) {
for (unsigned I = 0; I < ValueArraySize; ++I)
@@ -134,8 +128,7 @@ struct GCNRegPressure {
static unsigned getRegKind(const TargetRegisterClass *RC,
const SIRegisterInfo *STI);
- friend GCNRegPressure max(const GCNRegPressure &P1,
- const GCNRegPressure &P2);
+ friend GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2);
friend Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST,
unsigned DynamicVGPRBlockSize);
@@ -162,6 +155,101 @@ inline GCNRegPressure operator-(const GCNRegPressure &P1,
return Diff;
}
+////////////////////////////////////////////////////////////////////////////////
+// GCNRPTarget
+
+/// Models a register pressure target, allowing to evaluate and track register
+/// savings against that target from a starting \ref GCNRegPressure.
+class GCNRPTarget {
+public:
+ /// Sets up the target such that the register pressure starting at \p RP does
+ /// not show register spilling on function \p MF (w.r.t. the function's
+ /// mininum target occupancy).
+ GCNRPTarget(const MachineFunction &MF, const GCNRegPressure &RP,
+ bool CombineVGPRSavings = false);
+
+ /// Sets up the target such that the register pressure starting at \p RP does
+ /// not use more than \p NumSGPRs SGPRs and \p NumVGPRs VGPRs on function \p
+ /// MF.
+ GCNRPTarget(unsigned NumSGPRs, unsigned NumVGPRs, const MachineFunction &MF,
+ const GCNRegPressure &RP, bool CombineVGPRSavings = false);
+
+ /// Sets up the target such that the register pressure starting at \p RP does
+ /// not prevent achieving an occupancy of at least \p Occupancy on function
+ /// \p MF.
+ GCNRPTarget(unsigned Occupancy, const MachineFunction &MF,
+ const GCNRegPressure &RP, bool CombineVGPRSavings = false);
+
+ const GCNRegPressure &getCurrentRP() const { return RP; }
+
+ void setRP(const GCNRegPressure &NewRP) { RP = NewRP; }
+
+ /// Determines whether saving virtual register \p Reg will be beneficial
+ /// towards achieving the RP target.
+ bool isSaveBeneficial(Register Reg, const MachineRegisterInfo &MRI) const;
+
+ /// Saves virtual register \p Reg with lanemask \p Mask.
+ void saveReg(Register Reg, LaneBitmask Mask, const MachineRegisterInfo &MRI) {
+ RP.inc(Reg, Mask, LaneBitmask::getNone(), MRI);
+ }
+
+ /// Whether the current RP is at or below the defined pressure target.
+ bool satisfied() const;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ friend raw_ostream &operator<<(raw_ostream &OS, const GCNRPTarget &Target) {
+ OS << "Actual/Target: " << Target.RP.getSGPRNum() << '/' << Target.MaxSGPRs
+ << " SGPRs, " << Target.RP.getArchVGPRNum() << '/' << Target.MaxVGPRs
+ << " ArchVGPRs, " << Target.RP.getAGPRNum() << '/' << Target.MaxVGPRs
+ << " AGPRs";
+
+ if (Target.MaxUnifiedVGPRs) {
+ OS << ", " << Target.RP.getVGPRNum(true) << '/' << Target.MaxUnifiedVGPRs
+ << " VGPRs (unified)";
+ } else if (Target.CombineVGPRSavings) {
+ OS << ", " << Target.RP.getArchVGPRNum() + Target.RP.getAGPRNum() << '/'
+ << 2 * Target.MaxVGPRs << " VGPRs (combined target)";
+ }
+ return OS;
+ }
+#endif
+
+private:
+ /// Current register pressure.
+ GCNRegPressure RP;
+
+ /// Target number of SGPRs.
+ unsigned MaxSGPRs;
+ /// Target number of ArchVGPRs and AGPRs.
+ unsigned MaxVGPRs;
+ /// Target number of overall VGPRs for subtargets with unified RFs. Always 0
+ /// for subtargets with non-unified RFs.
+ unsigned MaxUnifiedVGPRs;
+ /// Whether we consider that the register allocator will be able to swap
+ /// between ArchVGPRs and AGPRs by copying them to a super register class.
+ /// Concretely, this allows savings in one of the VGPR banks to help toward
+ /// savings in the other VGPR bank.
+ bool CombineVGPRSavings;
+
+ inline bool satisifiesVGPRBanksTarget() const {
+ assert(CombineVGPRSavings && "only makes sense with combined savings");
+ return RP.getArchVGPRNum() + RP.getAGPRNum() <= 2 * MaxVGPRs;
+ }
+
+ /// Always satisified when the subtarget doesn't have a unified RF.
+ inline bool satisfiesUnifiedTarget() const {
+ return !MaxUnifiedVGPRs || RP.getVGPRNum(true) <= MaxUnifiedVGPRs;
+ }
+
+ inline bool isVGPRBankSaveBeneficial(unsigned NumVGPRs) const {
+ return NumVGPRs > MaxVGPRs || !satisfiesUnifiedTarget() ||
+ (CombineVGPRSavings && !satisifiesVGPRBanksTarget());
+ }
+
+ void setRegLimits(unsigned MaxSGPRs, unsigned MaxVGPRs,
+ const MachineFunction &MF);
+};
+
///////////////////////////////////////////////////////////////////////////////
// GCNRPTracker
@@ -197,9 +285,7 @@ class GCNRPTracker {
GCNRegPressure getPressure() const { return CurPressure; }
- decltype(LiveRegs) moveLiveRegs() {
- return std::move(LiveRegs);
- }
+ decltype(LiveRegs) moveLiveRegs() { return std::move(LiveRegs); }
};
GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
@@ -345,7 +431,7 @@ GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
/// Note: there is no entry in the map for instructions with empty live reg set
/// Complexity = O(NumVirtRegs * averageLiveRangeSegmentsPerReg * lg(R))
template <typename Range>
-DenseMap<MachineInstr*, GCNRPTracker::LiveRegSet>
+DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS) {
std::vector<SlotIndex> Indexes;
Indexes.reserve(std::distance(R.begin(), R.end()));
@@ -370,7 +456,7 @@ getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS) {
if (!LI.hasSubRanges()) {
for (auto SI : LiveIdxs)
LiveRegMap[SII.getInstructionFromIndex(SI)][Reg] =
- MRI.getMaxLaneMaskForVReg(Reg);
+ MRI.getMaxLaneMaskForVReg(Reg);
} else
for (const auto &S : LI.subranges()) {
// constrain search for subranges by indexes live at main range
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 8aae340de791e..fce8f36d45969 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1699,184 +1699,17 @@ bool PreRARematStage::allUsesAvailableAt(const MachineInstr *InstToRemat,
return true;
}
-namespace {
-/// Models excess register pressure in a region and tracks our progress as we
-/// identify rematerialization opportunities.
-struct ExcessRP {
- /// Number of excess SGPRs.
- unsigned SGPRs = 0;
- /// Number of excess ArchVGPRs.
- unsigned ArchVGPRs = 0;
- /// Number of excess AGPRs.
- unsigned AGPRs = 0;
- /// For unified register files, number of excess VGPRs. 0 otherwise.
- unsigned VGPRs = 0;
- /// For unified register files with AGPR usage, number of excess ArchVGPRs to
- /// save before we are able to save a whole allocation granule.
- unsigned ArchVGPRsToAlignment = 0;
- /// Whether the region uses AGPRs.
- bool HasAGPRs = false;
- /// Whether the subtarget has a unified RF.
- bool UnifiedRF;
- /// Whether we consider that the register allocator will be able to swap
- /// between ArchVGPRs and AGPRs by copying them to a super register class.
- /// Concretely, this allows savings of one kind of VGPR to help toward savings
- /// the other kind of VGPR.
- bool CombineVGPRSavings;
-
- /// Constructs the excess RP model; determines the excess pressure w.r.t. a
- /// maximum number of allowed SGPRs/VGPRs.
- ExcessRP(const GCNSubtarget &ST, const GCNRegPressure &RP, unsigned MaxSGPRs,
- unsigned MaxVGPRs, bool CombineVGPRSavings);
-
- /// Accounts for \p NumRegs saved SGPRs in the model. Returns whether saving
- /// these SGPRs helped reduce excess pressure.
- bool saveSGPRs(unsigned NumRegs) { return saveRegs(SGPRs, NumRegs); }
-
- /// Accounts for \p NumRegs saved ArchVGPRs in the model. Returns whether
- /// saving these ArchGPRs helped reduce excess pressure.
- bool saveArchVGPRs(unsigned NumRegs);
-
- /// Accounts for \p NumRegs saved AGPRs in the model. Returns whether saving
- /// these AGPRs helped reduce excess pressure.
- bool saveAGPRs(unsigned NumRegs);
-
- /// Returns whether there is any excess register pressure.
- operator bool() const { return SGPRs || ArchVGPRs || AGPRs || VGPRs; }
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- friend raw_ostream &operator<<(raw_ostream &OS, const ExcessRP &Excess) {
- OS << Excess.SGPRs << " SGPRs, " << Excess.ArchVGPRs << " ArchVGPRs, and "
- << Excess.AGPRs << " AGPRs, (" << Excess.VGPRs
- << " VGPRs in total, next ArchVGPR aligment in "
- << Excess.ArchVGPRsToAlignment << " registers)\n";
- return OS;
- }
-#endif
-
-private:
- static inline bool saveRegs(unsigned &LeftToSave, unsigned &NumRegs) {
- unsigned NumSaved = std::min(LeftToSave, NumRegs);
- NumRegs -= NumSaved;
- LeftToSave -= NumSaved;
- return NumSaved;
- }
-};
-} // namespace
-
-ExcessRP::ExcessRP(const GCNSubtarget &ST, const GCNRegPressure &RP,
- unsigned MaxSGPRs, unsigned MaxVGPRs,
- bool CombineVGPRSavings)
- : UnifiedRF(ST.hasGFX90AInsts()), CombineVGPRSavings(CombineVGPRSavings) {
- // Compute excess SGPR pressure.
- unsigned NumSGPRs = RP.getSGPRNum();
- if (NumSGPRs > MaxSGPRs)
- SGPRs = NumSGPRs - MaxSGPRs;
-
- // Compute excess ArchVGPR/AGPR pressure.
- unsigned NumArchVGPRs = RP.getArchVGPRNum();
- unsigned NumAGPRs = RP.getAGPRNum();
- HasAGPRs = NumAGPRs;
- if (!UnifiedRF) {
- // Non-unified RF. Account for excess pressure for ArchVGPRs and AGPRs
- // independently.
- if (NumArchVGPRs > MaxVGPRs)
- ArchVGPRs = NumArchVGPRs - MaxVGPRs;
- if (NumAGPRs > MaxVGPRs)
- AGPRs = NumAGPRs - MaxVGPRs;
- return;
- }
-
- // Independently of whether overall VGPR pressure is under the limit, we still
- // have to check whether ArchVGPR pressure or AGPR pressure alone exceeds the
- // number of addressable registers in each category.
- const unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs();
- if (NumArchVGPRs > MaxArchVGPRs) {
- ArchVGPRs = NumArchVGPRs - MaxArchVGPRs;
- NumArchVGPRs = MaxArchVGPRs;
- }
- if (NumAGPRs > MaxArchVGPRs) {
- AGPRs = NumAGPRs - MaxArchVGPRs;
- NumAGPRs = MaxArchVGPRs;
- }
-
- // Check overall VGPR usage against the limit; any excess above addressable
- // register limits has already been accounted for.
- const unsigned Granule = AMDGPU::IsaInfo::getArchVGPRAllocGranule();
- unsigned NumVGPRs = GCNRegPressure::getUnifiedVGPRNum(NumArchVGPRs, NumAGPRs);
- if (NumVGPRs > MaxVGPRs) {
- VGPRs = NumVGPRs - MaxVGPRs;
- ArchVGPRsToAlignment = NumArchVGPRs - alignDown(NumArchVGPRs, Granule);
- if (!ArchVGPRsToAlignment)
- ArchVGPRsToAlignment = Granule;
- }
-}
-
-bool ExcessRP::saveArchVGPRs(unsigned NumRegs) {
- bool Progress = saveRegs(ArchVGPRs, NumRegs);
- if (!NumRegs)
- return Progress;
-
- if (!UnifiedRF) {
- if (CombineVGPRSavings)
- Progress |= saveRegs(AGPRs, NumRegs);
- } else if (HasAGPRs && (VGPRs || (CombineVGPRSavings && AGPRs))) {
- // There is progress as long as there are VGPRs left to save, even if the
- // save induced by this particular call does not cross an ArchVGPR alignment
- // barrier.
- Progress = true;
-
- // ArchVGPRs can only be allocated as a multiple of a granule in unified RF.
- unsigned NumSavedRegs = 0;
-
- // Count the number of whole ArchVGPR allocation granules we can save.
- const unsigned Granule = AMDGPU::IsaInfo::getArchVGPRAllocGranule();
- if (unsigned NumGranules = NumRegs / Granule; NumGranules) {
- NumSavedRegs = NumGranules * Granule;
- NumRegs -= NumSavedRegs;
- }
-
- // We may be able to save one more whole ArchVGPR allocation granule.
- if (NumRegs >= ArchVGPRsToAlignment) {
- NumSavedRegs += Granule;
- ArchVGPRsToAlignment = Granule - (NumRegs - ArchVGPRsToAlignment);
- } else {
- ArchVGPRsToAlignment -= NumRegs;
- }
-
- // Prioritize saving generic VGPRs, then AGPRs if we consider that the
- // register allocator will be able to replace an AGPR with an ArchVGPR.
- saveRegs(VGPRs, NumSavedRegs);
- if (CombineVGPRSavings)
- saveRegs(AGPRs, NumSavedRegs);
- } else {
- // No AGPR usage in the region i.e., no allocation granule to worry about.
- Progress |= saveRegs(VGPRs, NumRegs);
- }
- return Progress;
-}
-
-bool ExcessRP::saveAGPRs(unsigned NumRegs) {
- bool Progress = saveRegs(AGPRs, NumRegs);
- if (UnifiedRF)
- Progress |= saveRegs(VGPRs, NumRegs);
- if (CombineVGPRSavings)
- Progress |= saveRegs(ArchVGPRs, NumRegs);
- return Progress;
-}
-
bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
- const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
-
REMAT_DEBUG({
dbgs() << "Collecting rematerializable instructions in ";
MF.getFunction().printAsOperand(dbgs(), false);
dbgs() << '\n';
});
- // Maps optimizable regions (i.e., regions at minimum and VGPR-limited
- // occupancy, or regions with VGPR spilling) to a model of their excess RP.
- DenseMap<unsigned, ExcessRP> OptRegions;
+ // Maps optimizable regions (i.e., regions at minimum and register-limited
+ // occupancy, or regions with spilling) to the target RP we would like to
+ // reach.
+ DenseMap<unsigned, GCNRPTarget> OptRegions;
const Function &F = MF.getFunction();
unsigned DynamicVGPRBlockSize =
MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
@@ -1901,32 +1734,35 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
// the register allocator i.e., the scheduler will not be able to reason
// about these savings and will not report an increase in the achievable
// occupancy, triggering rollbacks.
- ExcessRP Excess(ST, RP, MaxSGPRsNoSpill, MaxVGPRsNoSpill,
- /*CombineVGPRSavings=*/true);
- if (Excess && IncreaseOccupancy) {
+ GCNRPTarget Target(MaxSGPRsNoSpill, MaxVGPRsNoSpill, MF, RP,
+ /*CombineVGPRSavings=*/true);
+ if (!Target.satisfied() && IncreaseOccupancy) {
// There is spilling in the region and we were so far trying to increase
// occupancy. Strop trying that and focus on reducing spilling.
IncreaseOccupancy = false;
OptRegions.clear();
} else if (IncreaseOccupancy) {
// There is no spilling in the region, try to increase occupancy.
- Excess = ExcessRP(ST, RP, MaxSGPRsIncOcc, MaxVGPRsIncOcc,
- /*CombineVGPRSavings=*/false);
+ Target = GCNRPTarget(MaxSGPRsIncOcc, MaxVGPRsIncOcc, MF, RP,
+ /*CombineVGPRSavings=*/false);
}
- if (Excess)
- OptRegions.insert({I, Excess});
+ if (!Target.satisfied())
+ OptRegions.insert({I, Target});
}
if (OptRegions.empty())
return false;
#ifndef NDEBUG
- if (IncreaseOccupancy)
- REMAT_DEBUG(dbgs() << "Occupancy minimal in regions:\n");
- else
- REMAT_DEBUG(dbgs() << "Spilling in regions:\n");
+ if (IncreaseOccupancy) {
+ REMAT_DEBUG(dbgs() << "Occupancy minimal (" << DAG.MinOccupancy
+ << ") in regions:\n");
+ } else {
+ REMAT_DEBUG(dbgs() << "Spilling w.r.t. minimum target occupancy ("
+ << WavesPerEU.first << ") in regions:\n");
+ }
for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
if (auto OptIt = OptRegions.find(I); OptIt != OptRegions.end())
- REMAT_DEBUG(dbgs() << " " << I << ": " << OptIt->getSecond() << '\n');
+ REMAT_DEBUG(dbgs() << " [" << I << "] " << OptIt->getSecond() << '\n');
}
#endif
@@ -1941,18 +1777,14 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
// estimate that we have identified enough rematerialization opportunities to
// achieve our goal, and sets Progress to true when this particular reduction
// in pressure was helpful toward that goal.
- auto ReduceRPInRegion = [&](auto OptIt, LaneBitmask Mask,
- const TargetRegisterClass *RC,
+ auto ReduceRPInRegion = [&](auto OptIt, Register Reg, LaneBitmask Mask,
bool &Progress) -> bool {
- ExcessRP &Excess = OptIt->getSecond();
- unsigned NumRegs = SIRegisterInfo::getNumCoveredRegs(Mask);
- if (SRI->isSGPRClass(RC))
- Progress |= Excess.saveSGPRs(NumRegs);
- else if (SRI->isAGPRClass(RC))
- Progress |= Excess.saveAGPRs(NumRegs);
- else
- Progress |= Excess.saveArchVGPRs(NumRegs);
- if (!Excess)
+ GCNRPTarget &Target = OptIt->getSecond();
+ if (!Target.isSaveBeneficial(Reg, DAG.MRI))
+ return false;
+ Progress = true;
+ Target.saveReg(Reg, Mask, DAG.MRI);
+ if (Target.satisfied())
OptRegions.erase(OptIt->getFirst());
return OptRegions.empty();
};
@@ -2013,7 +1845,6 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
Rematerializations.try_emplace(&DefMI, UseMI).first->second;
bool RematUseful = false;
- const TargetRegisterClass *RC = DAG.MRI.getRegClass(Reg);
if (auto It = OptRegions.find(I); It != OptRegions.end()) {
// Optimistically consider that moving the instruction out of its
// defining region will reduce RP in the latter; this assumes that
@@ -2021,7 +1852,7 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
// instruction and the end of the region.
REMAT_DEBUG(dbgs() << " Defining region is optimizable\n");
LaneBitmask Mask = DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I)[Reg];
- if (ReduceRPInRegion(It, Mask, RC, RematUseful))
+ if (ReduceRPInRegion(It, Reg, Mask, RematUseful))
return true;
}
@@ -2041,7 +1872,8 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
// instruction's use.
if (auto It = OptRegions.find(LIRegion); It != OptRegions.end()) {
REMAT_DEBUG(dbgs() << " Live-in in region " << LIRegion << '\n');
- if (ReduceRPInRegion(It, DAG.LiveIns[LIRegion][Reg], RC, RematUseful))
+ if (ReduceRPInRegion(It, Reg, DAG.LiveIns[LIRegion][Reg],
+ RematUseful))
return true;
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir
index 7ef4b0f9b6108..23412aaeb2e23 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir
@@ -6,7 +6,7 @@
define void @small_num_sgprs_as_spill() "amdgpu-num-sgpr"="85" {
ret void
}
- define void @small_num_vgprs_as_spill() "amdgpu-num-vgpr"="28" {
+ define void @small_num_vgprs_as_spill() "amdgpu-num-vgpr"="14" {
ret void
}
define void @dont_remat_waves_per_eu() "amdgpu-flat-work-group-size"="1024,1024" "amdgpu-waves-per-eu"="7,7" {
@@ -391,14 +391,14 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
; GFX908-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]], implicit [[V_CVT_I32_F64_e32_27]]
; GFX908-NEXT: S_ENDPGM 0
@@ -434,13 +434,7 @@ body: |
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.1:
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
@@ -448,8 +442,14 @@ body: |
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]]
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]]
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]], implicit [[V_CVT_I32_F64_e32_27]]
; GFX90A-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1
@@ -1090,8 +1090,8 @@ body: |
S_ENDPGM 0
...
# Requested [min,max] occupancy is [1,2]. There are 257 ArchVGPRs in use when
-# only 256 are available. We should just try to eliminate spilling by saving one
-# ArchVGPR.
+# only 256 are available. Nothing to do since it should be possible to use an
+# AGPR for one of the ArchVGPRs.
---
name: reduce_spill_archvgpr_above_addressable_limit
tracksRegLiveness: true
@@ -1103,6 +1103,7 @@ body: |
; GFX908-NEXT: successors: %bb.1(0x80000000)
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
@@ -1386,7 +1387,6 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_220]], implicit [[V_CVT_I32_F64_e32_221]], implicit [[V_CVT_I32_F64_e32_222]], implicit [[V_CVT_I32_F64_e32_223]], implicit [[V_CVT_I32_F64_e32_224]], implicit [[V_CVT_I32_F64_e32_225]], implicit [[V_CVT_I32_F64_e32_226]], implicit [[V_CVT_I32_F64_e32_227]], implicit [[V_CVT_I32_F64_e32_228]], implicit [[V_CVT_I32_F64_e32_229]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_230]], implicit [[V_CVT_I32_F64_e32_231]], implicit [[V_CVT_I32_F64_e32_232]], implicit [[V_CVT_I32_F64_e32_233]], implicit [[V_CVT_I32_F64_e32_234]], implicit [[V_CVT_I32_F64_e32_235]], implicit [[V_CVT_I32_F64_e32_236]], implicit [[V_CVT_I32_F64_e32_237]], implicit [[V_CVT_I32_F64_e32_238]], implicit [[V_CVT_I32_F64_e32_239]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_240]], implicit [[V_CVT_I32_F64_e32_241]], implicit [[V_CVT_I32_F64_e32_242]], implicit [[V_CVT_I32_F64_e32_243]], implicit [[V_CVT_I32_F64_e32_244]], implicit [[V_CVT_I32_F64_e32_245]], implicit [[V_CVT_I32_F64_e32_246]], implicit [[V_CVT_I32_F64_e32_247]], implicit [[V_CVT_I32_F64_e32_248]], implicit [[V_CVT_I32_F64_e32_249]]
- ; GFX908-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_250]], implicit [[V_CVT_I32_F64_e32_251]], implicit [[V_CVT_I32_F64_e32_252]], implicit [[V_CVT_I32_F64_e32_253]], implicit [[V_CVT_I32_F64_e32_254]], implicit [[V_CVT_I32_F64_e32_255]], implicit [[V_CVT_I32_F64_e32_256]], implicit [[DEF]]
; GFX908-NEXT: S_ENDPGM 0
;
@@ -1649,8 +1649,9 @@ body: |
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_252:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 252, implicit $exec, implicit $mode, implicit-def $m0
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_253:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 253, implicit $exec, implicit $mode, implicit-def $m0
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_254:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 254, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_255:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 255, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_256:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 256, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_255:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 256, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_256:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 255, implicit $exec, implicit $mode
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.1:
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
@@ -1678,8 +1679,7 @@ body: |
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_220]], implicit [[V_CVT_I32_F64_e32_221]], implicit [[V_CVT_I32_F64_e32_222]], implicit [[V_CVT_I32_F64_e32_223]], implicit [[V_CVT_I32_F64_e32_224]], implicit [[V_CVT_I32_F64_e32_225]], implicit [[V_CVT_I32_F64_e32_226]], implicit [[V_CVT_I32_F64_e32_227]], implicit [[V_CVT_I32_F64_e32_228]], implicit [[V_CVT_I32_F64_e32_229]]
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_230]], implicit [[V_CVT_I32_F64_e32_231]], implicit [[V_CVT_I32_F64_e32_232]], implicit [[V_CVT_I32_F64_e32_233]], implicit [[V_CVT_I32_F64_e32_234]], implicit [[V_CVT_I32_F64_e32_235]], implicit [[V_CVT_I32_F64_e32_236]], implicit [[V_CVT_I32_F64_e32_237]], implicit [[V_CVT_I32_F64_e32_238]], implicit [[V_CVT_I32_F64_e32_239]]
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_240]], implicit [[V_CVT_I32_F64_e32_241]], implicit [[V_CVT_I32_F64_e32_242]], implicit [[V_CVT_I32_F64_e32_243]], implicit [[V_CVT_I32_F64_e32_244]], implicit [[V_CVT_I32_F64_e32_245]], implicit [[V_CVT_I32_F64_e32_246]], implicit [[V_CVT_I32_F64_e32_247]], implicit [[V_CVT_I32_F64_e32_248]], implicit [[V_CVT_I32_F64_e32_249]]
- ; GFX90A-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_250]], implicit [[V_CVT_I32_F64_e32_251]], implicit [[V_CVT_I32_F64_e32_252]], implicit [[V_CVT_I32_F64_e32_253]], implicit [[V_CVT_I32_F64_e32_254]], implicit [[V_CVT_I32_F64_e32_255]], implicit [[V_CVT_I32_F64_e32_256]], implicit [[DEF]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_250]], implicit [[V_CVT_I32_F64_e32_251]], implicit [[V_CVT_I32_F64_e32_252]], implicit [[V_CVT_I32_F64_e32_253]], implicit [[V_CVT_I32_F64_e32_254]], implicit [[V_CVT_I32_F64_e32_256]], implicit [[V_CVT_I32_F64_e32_255]], implicit [[DEF]]
; GFX90A-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1
@@ -1976,9 +1976,8 @@ body: |
S_ENDPGM 0
...
# Requested [min,max] occupancy is [1,2]. There are 257 AGPRs in use when only
-# 256 are available. We should just try to eliminate spilling by saving one
-# AGPR or ArchVGPR (we assume we will be able to spill AGPRs to ArchVGPRs in
-# such cases).
+# 256 are available. Nothing to do since it should be possible to use an
+# ArchVGPR for one of the AGPRs.
---
name: reduce_spill_agpr_above_addressable_limit
tracksRegLiveness: true
@@ -1989,7 +1988,8 @@ body: |
; GFX908: bb.0:
; GFX908-NEXT: successors: %bb.1(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 258, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 257, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 258, implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF1:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF2:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2249,33 +2249,32 @@ body: |
; GFX908-NEXT: [[DEF256:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]], implicit [[DEF9]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]], implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]], implicit [[DEF18]], implicit [[DEF19]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]], implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]], implicit [[DEF28]], implicit [[DEF29]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF30]], implicit [[DEF31]], implicit [[DEF32]], implicit [[DEF33]], implicit [[DEF34]], implicit [[DEF35]], implicit [[DEF36]], implicit [[DEF37]], implicit [[DEF38]], implicit [[DEF39]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF40]], implicit [[DEF41]], implicit [[DEF42]], implicit [[DEF43]], implicit [[DEF44]], implicit [[DEF45]], implicit [[DEF46]], implicit [[DEF47]], implicit [[DEF48]], implicit [[DEF49]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF50]], implicit [[DEF51]], implicit [[DEF52]], implicit [[DEF53]], implicit [[DEF54]], implicit [[DEF55]], implicit [[DEF56]], implicit [[DEF57]], implicit [[DEF58]], implicit [[DEF59]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF60]], implicit [[DEF61]], implicit [[DEF62]], implicit [[DEF63]], implicit [[DEF64]], implicit [[DEF65]], implicit [[DEF66]], implicit [[DEF67]], implicit [[DEF68]], implicit [[DEF69]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF70]], implicit [[DEF71]], implicit [[DEF72]], implicit [[DEF73]], implicit [[DEF74]], implicit [[DEF75]], implicit [[DEF76]], implicit [[DEF77]], implicit [[DEF78]], implicit [[DEF79]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF80]], implicit [[DEF81]], implicit [[DEF82]], implicit [[DEF83]], implicit [[DEF84]], implicit [[DEF85]], implicit [[DEF86]], implicit [[DEF87]], implicit [[DEF88]], implicit [[DEF89]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF90]], implicit [[DEF91]], implicit [[DEF92]], implicit [[DEF93]], implicit [[DEF94]], implicit [[DEF95]], implicit [[DEF96]], implicit [[DEF97]], implicit [[DEF98]], implicit [[DEF99]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF100]], implicit [[DEF101]], implicit [[DEF102]], implicit [[DEF103]], implicit [[DEF104]], implicit [[DEF105]], implicit [[DEF106]], implicit [[DEF107]], implicit [[DEF108]], implicit [[DEF109]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF110]], implicit [[DEF111]], implicit [[DEF112]], implicit [[DEF113]], implicit [[DEF114]], implicit [[DEF115]], implicit [[DEF116]], implicit [[DEF117]], implicit [[DEF118]], implicit [[DEF119]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF120]], implicit [[DEF121]], implicit [[DEF122]], implicit [[DEF123]], implicit [[DEF124]], implicit [[DEF125]], implicit [[DEF126]], implicit [[DEF127]], implicit [[DEF128]], implicit [[DEF129]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF130]], implicit [[DEF131]], implicit [[DEF132]], implicit [[DEF133]], implicit [[DEF134]], implicit [[DEF135]], implicit [[DEF136]], implicit [[DEF137]], implicit [[DEF138]], implicit [[DEF139]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF140]], implicit [[DEF141]], implicit [[DEF142]], implicit [[DEF143]], implicit [[DEF144]], implicit [[DEF145]], implicit [[DEF146]], implicit [[DEF147]], implicit [[DEF148]], implicit [[DEF149]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF150]], implicit [[DEF151]], implicit [[DEF152]], implicit [[DEF153]], implicit [[DEF154]], implicit [[DEF155]], implicit [[DEF156]], implicit [[DEF157]], implicit [[DEF158]], implicit [[DEF159]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF160]], implicit [[DEF161]], implicit [[DEF162]], implicit [[DEF163]], implicit [[DEF164]], implicit [[DEF165]], implicit [[DEF166]], implicit [[DEF167]], implicit [[DEF168]], implicit [[DEF169]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF170]], implicit [[DEF171]], implicit [[DEF172]], implicit [[DEF173]], implicit [[DEF174]], implicit [[DEF175]], implicit [[DEF176]], implicit [[DEF177]], implicit [[DEF178]], implicit [[DEF179]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF180]], implicit [[DEF181]], implicit [[DEF182]], implicit [[DEF183]], implicit [[DEF184]], implicit [[DEF185]], implicit [[DEF186]], implicit [[DEF187]], implicit [[DEF188]], implicit [[DEF189]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF190]], implicit [[DEF191]], implicit [[DEF192]], implicit [[DEF193]], implicit [[DEF194]], implicit [[DEF195]], implicit [[DEF196]], implicit [[DEF197]], implicit [[DEF198]], implicit [[DEF199]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF200]], implicit [[DEF201]], implicit [[DEF202]], implicit [[DEF203]], implicit [[DEF204]], implicit [[DEF205]], implicit [[DEF206]], implicit [[DEF207]], implicit [[DEF208]], implicit [[DEF209]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF210]], implicit [[DEF211]], implicit [[DEF212]], implicit [[DEF213]], implicit [[DEF214]], implicit [[DEF215]], implicit [[DEF216]], implicit [[DEF217]], implicit [[DEF218]], implicit [[DEF219]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF220]], implicit [[DEF221]], implicit [[DEF222]], implicit [[DEF223]], implicit [[DEF224]], implicit [[DEF225]], implicit [[DEF226]], implicit [[DEF227]], implicit [[DEF228]], implicit [[DEF229]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF230]], implicit [[DEF231]], implicit [[DEF232]], implicit [[DEF233]], implicit [[DEF234]], implicit [[DEF235]], implicit [[DEF236]], implicit [[DEF237]], implicit [[DEF238]], implicit [[DEF239]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF240]], implicit [[DEF241]], implicit [[DEF242]], implicit [[DEF243]], implicit [[DEF244]], implicit [[DEF245]], implicit [[DEF246]], implicit [[DEF247]], implicit [[DEF248]], implicit [[DEF249]]
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 257, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF250]], implicit [[DEF251]], implicit [[DEF252]], implicit [[DEF253]], implicit [[DEF254]], implicit [[DEF255]], implicit [[DEF256]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF128]], implicit [[DEF129]], implicit [[DEF130]], implicit [[DEF131]], implicit [[DEF132]], implicit [[DEF133]], implicit [[DEF134]], implicit [[DEF135]], implicit [[DEF136]], implicit [[DEF137]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF138]], implicit [[DEF139]], implicit [[DEF140]], implicit [[DEF141]], implicit [[DEF142]], implicit [[DEF143]], implicit [[DEF144]], implicit [[DEF145]], implicit [[DEF146]], implicit [[DEF147]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF148]], implicit [[DEF149]], implicit [[DEF150]], implicit [[DEF151]], implicit [[DEF152]], implicit [[DEF153]], implicit [[DEF154]], implicit [[DEF155]], implicit [[DEF156]], implicit [[DEF157]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF158]], implicit [[DEF159]], implicit [[DEF160]], implicit [[DEF161]], implicit [[DEF162]], implicit [[DEF163]], implicit [[DEF164]], implicit [[DEF165]], implicit [[DEF166]], implicit [[DEF167]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF168]], implicit [[DEF169]], implicit [[DEF170]], implicit [[DEF171]], implicit [[DEF172]], implicit [[DEF173]], implicit [[DEF174]], implicit [[DEF175]], implicit [[DEF176]], implicit [[DEF177]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF178]], implicit [[DEF179]], implicit [[DEF180]], implicit [[DEF181]], implicit [[DEF182]], implicit [[DEF183]], implicit [[DEF184]], implicit [[DEF185]], implicit [[DEF186]], implicit [[DEF187]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF188]], implicit [[DEF189]], implicit [[DEF190]], implicit [[DEF191]], implicit [[DEF192]], implicit [[DEF193]], implicit [[DEF194]], implicit [[DEF195]], implicit [[DEF196]], implicit [[DEF197]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF198]], implicit [[DEF199]], implicit [[DEF200]], implicit [[DEF201]], implicit [[DEF202]], implicit [[DEF203]], implicit [[DEF204]], implicit [[DEF205]], implicit [[DEF206]], implicit [[DEF207]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF208]], implicit [[DEF209]], implicit [[DEF210]], implicit [[DEF211]], implicit [[DEF212]], implicit [[DEF213]], implicit [[DEF214]], implicit [[DEF215]], implicit [[DEF216]], implicit [[DEF217]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF218]], implicit [[DEF219]], implicit [[DEF220]], implicit [[DEF221]], implicit [[DEF222]], implicit [[DEF223]], implicit [[DEF224]], implicit [[DEF225]], implicit [[DEF226]], implicit [[DEF227]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF228]], implicit [[DEF229]], implicit [[DEF230]], implicit [[DEF231]], implicit [[DEF232]], implicit [[DEF233]], implicit [[DEF234]], implicit [[DEF235]], implicit [[DEF236]], implicit [[DEF237]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF238]], implicit [[DEF239]], implicit [[DEF240]], implicit [[DEF241]], implicit [[DEF242]], implicit [[DEF243]], implicit [[DEF244]], implicit [[DEF245]], implicit [[DEF246]], implicit [[DEF247]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF248]], implicit [[DEF249]], implicit [[DEF250]], implicit [[DEF251]], implicit [[DEF252]], implicit [[DEF253]], implicit [[DEF254]], implicit [[DEF255]], implicit [[DEF256]], implicit [[DEF]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]], implicit [[DEF9]], implicit [[DEF10]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF11]], implicit [[DEF12]], implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]], implicit [[DEF18]], implicit [[DEF19]], implicit [[DEF20]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF21]], implicit [[DEF22]], implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]], implicit [[DEF28]], implicit [[DEF29]], implicit [[DEF30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF31]], implicit [[DEF32]], implicit [[DEF33]], implicit [[DEF34]], implicit [[DEF35]], implicit [[DEF36]], implicit [[DEF37]], implicit [[DEF38]], implicit [[DEF39]], implicit [[DEF40]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF41]], implicit [[DEF42]], implicit [[DEF43]], implicit [[DEF44]], implicit [[DEF45]], implicit [[DEF46]], implicit [[DEF47]], implicit [[DEF48]], implicit [[DEF49]], implicit [[DEF50]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF51]], implicit [[DEF52]], implicit [[DEF53]], implicit [[DEF54]], implicit [[DEF55]], implicit [[DEF56]], implicit [[DEF57]], implicit [[DEF58]], implicit [[DEF59]], implicit [[DEF60]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF61]], implicit [[DEF62]], implicit [[DEF63]], implicit [[DEF64]], implicit [[DEF65]], implicit [[DEF66]], implicit [[DEF67]], implicit [[DEF68]], implicit [[DEF69]], implicit [[DEF70]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF71]], implicit [[DEF72]], implicit [[DEF73]], implicit [[DEF74]], implicit [[DEF75]], implicit [[DEF76]], implicit [[DEF77]], implicit [[DEF78]], implicit [[DEF79]], implicit [[DEF80]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF81]], implicit [[DEF82]], implicit [[DEF83]], implicit [[DEF84]], implicit [[DEF85]], implicit [[DEF86]], implicit [[DEF87]], implicit [[DEF88]], implicit [[DEF89]], implicit [[DEF90]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF91]], implicit [[DEF92]], implicit [[DEF93]], implicit [[DEF94]], implicit [[DEF95]], implicit [[DEF96]], implicit [[DEF97]], implicit [[DEF98]], implicit [[DEF99]], implicit [[DEF100]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF101]], implicit [[DEF102]], implicit [[DEF103]], implicit [[DEF104]], implicit [[DEF105]], implicit [[DEF106]], implicit [[DEF107]], implicit [[DEF108]], implicit [[DEF109]], implicit [[DEF110]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF111]], implicit [[DEF112]], implicit [[DEF113]], implicit [[DEF114]], implicit [[DEF115]], implicit [[DEF116]], implicit [[DEF117]], implicit [[DEF118]], implicit [[DEF119]], implicit [[DEF120]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF121]], implicit [[DEF122]], implicit [[DEF123]], implicit [[DEF124]], implicit [[DEF125]], implicit [[DEF126]], implicit [[DEF127]], implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
; GFX908-NEXT: S_ENDPGM 0
;
; GFX90A-LABEL: name: reduce_spill_agpr_above_addressable_limit
@@ -2534,41 +2533,41 @@ body: |
; GFX90A-NEXT: [[DEF249:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF250:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF251:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 257, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 258, implicit $exec, implicit $mode
; GFX90A-NEXT: [[DEF252:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF253:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF254:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF255:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 257, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 258, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[DEF256:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.1:
- ; GFX90A-NEXT: [[DEF256:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF256]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]], implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]], implicit [[DEF18]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]], implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]], implicit [[DEF28]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF29]], implicit [[DEF30]], implicit [[DEF31]], implicit [[DEF32]], implicit [[DEF33]], implicit [[DEF34]], implicit [[DEF35]], implicit [[DEF36]], implicit [[DEF37]], implicit [[DEF38]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF39]], implicit [[DEF40]], implicit [[DEF41]], implicit [[DEF42]], implicit [[DEF43]], implicit [[DEF44]], implicit [[DEF45]], implicit [[DEF46]], implicit [[DEF47]], implicit [[DEF48]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF49]], implicit [[DEF50]], implicit [[DEF51]], implicit [[DEF52]], implicit [[DEF53]], implicit [[DEF54]], implicit [[DEF55]], implicit [[DEF56]], implicit [[DEF57]], implicit [[DEF58]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF59]], implicit [[DEF60]], implicit [[DEF61]], implicit [[DEF62]], implicit [[DEF63]], implicit [[DEF64]], implicit [[DEF65]], implicit [[DEF66]], implicit [[DEF67]], implicit [[DEF68]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF69]], implicit [[DEF70]], implicit [[DEF71]], implicit [[DEF72]], implicit [[DEF73]], implicit [[DEF74]], implicit [[DEF75]], implicit [[DEF76]], implicit [[DEF77]], implicit [[DEF78]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF79]], implicit [[DEF80]], implicit [[DEF81]], implicit [[DEF82]], implicit [[DEF83]], implicit [[DEF84]], implicit [[DEF85]], implicit [[DEF86]], implicit [[DEF87]], implicit [[DEF88]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF89]], implicit [[DEF90]], implicit [[DEF91]], implicit [[DEF92]], implicit [[DEF93]], implicit [[DEF94]], implicit [[DEF95]], implicit [[DEF96]], implicit [[DEF97]], implicit [[DEF98]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF99]], implicit [[DEF100]], implicit [[DEF101]], implicit [[DEF102]], implicit [[DEF103]], implicit [[DEF104]], implicit [[DEF105]], implicit [[DEF106]], implicit [[DEF107]], implicit [[DEF108]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF109]], implicit [[DEF110]], implicit [[DEF111]], implicit [[DEF112]], implicit [[DEF113]], implicit [[DEF114]], implicit [[DEF115]], implicit [[DEF116]], implicit [[DEF117]], implicit [[DEF118]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF119]], implicit [[DEF120]], implicit [[DEF121]], implicit [[DEF122]], implicit [[DEF123]], implicit [[DEF124]], implicit [[DEF125]], implicit [[DEF126]], implicit [[DEF127]], implicit [[DEF128]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF129]], implicit [[DEF130]], implicit [[DEF131]], implicit [[DEF132]], implicit [[DEF133]], implicit [[DEF134]], implicit [[DEF135]], implicit [[DEF136]], implicit [[DEF137]], implicit [[DEF138]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF139]], implicit [[DEF140]], implicit [[DEF141]], implicit [[DEF142]], implicit [[DEF143]], implicit [[DEF144]], implicit [[DEF145]], implicit [[DEF146]], implicit [[DEF147]], implicit [[DEF148]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF149]], implicit [[DEF150]], implicit [[DEF151]], implicit [[DEF152]], implicit [[DEF153]], implicit [[DEF154]], implicit [[DEF155]], implicit [[DEF156]], implicit [[DEF157]], implicit [[DEF158]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF159]], implicit [[DEF160]], implicit [[DEF161]], implicit [[DEF162]], implicit [[DEF163]], implicit [[DEF164]], implicit [[DEF165]], implicit [[DEF166]], implicit [[DEF167]], implicit [[DEF168]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF169]], implicit [[DEF170]], implicit [[DEF171]], implicit [[DEF172]], implicit [[DEF173]], implicit [[DEF174]], implicit [[DEF175]], implicit [[DEF176]], implicit [[DEF177]], implicit [[DEF178]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF179]], implicit [[DEF180]], implicit [[DEF181]], implicit [[DEF182]], implicit [[DEF183]], implicit [[DEF184]], implicit [[DEF185]], implicit [[DEF186]], implicit [[DEF187]], implicit [[DEF188]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF189]], implicit [[DEF190]], implicit [[DEF191]], implicit [[DEF192]], implicit [[DEF193]], implicit [[DEF194]], implicit [[DEF195]], implicit [[DEF196]], implicit [[DEF197]], implicit [[DEF198]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF199]], implicit [[DEF200]], implicit [[DEF201]], implicit [[DEF202]], implicit [[DEF203]], implicit [[DEF204]], implicit [[DEF205]], implicit [[DEF206]], implicit [[DEF207]], implicit [[DEF208]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF209]], implicit [[DEF210]], implicit [[DEF211]], implicit [[DEF212]], implicit [[DEF213]], implicit [[DEF214]], implicit [[DEF215]], implicit [[DEF216]], implicit [[DEF217]], implicit [[DEF218]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF219]], implicit [[DEF220]], implicit [[DEF221]], implicit [[DEF222]], implicit [[DEF223]], implicit [[DEF224]], implicit [[DEF225]], implicit [[DEF226]], implicit [[DEF227]], implicit [[DEF228]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF229]], implicit [[DEF230]], implicit [[DEF231]], implicit [[DEF232]], implicit [[DEF233]], implicit [[DEF234]], implicit [[DEF235]], implicit [[DEF236]], implicit [[DEF237]], implicit [[DEF238]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF239]], implicit [[DEF240]], implicit [[DEF241]], implicit [[DEF242]], implicit [[DEF243]], implicit [[DEF244]], implicit [[DEF245]], implicit [[DEF246]], implicit [[DEF247]], implicit [[DEF248]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF249]], implicit [[DEF250]], implicit [[DEF251]], implicit [[DEF252]], implicit [[DEF253]], implicit [[DEF254]], implicit [[DEF255]], implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF252]], implicit [[DEF253]], implicit [[DEF254]], implicit [[DEF255]], implicit [[DEF256]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]], implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]], implicit [[DEF13]], implicit [[DEF14]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]], implicit [[DEF18]], implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]], implicit [[DEF23]], implicit [[DEF24]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]], implicit [[DEF28]], implicit [[DEF29]], implicit [[DEF30]], implicit [[DEF31]], implicit [[DEF32]], implicit [[DEF33]], implicit [[DEF34]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF35]], implicit [[DEF36]], implicit [[DEF37]], implicit [[DEF38]], implicit [[DEF39]], implicit [[DEF40]], implicit [[DEF41]], implicit [[DEF42]], implicit [[DEF43]], implicit [[DEF44]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF45]], implicit [[DEF46]], implicit [[DEF47]], implicit [[DEF48]], implicit [[DEF49]], implicit [[DEF50]], implicit [[DEF51]], implicit [[DEF52]], implicit [[DEF53]], implicit [[DEF54]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF55]], implicit [[DEF56]], implicit [[DEF57]], implicit [[DEF58]], implicit [[DEF59]], implicit [[DEF60]], implicit [[DEF61]], implicit [[DEF62]], implicit [[DEF63]], implicit [[DEF64]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF65]], implicit [[DEF66]], implicit [[DEF67]], implicit [[DEF68]], implicit [[DEF69]], implicit [[DEF70]], implicit [[DEF71]], implicit [[DEF72]], implicit [[DEF73]], implicit [[DEF74]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF75]], implicit [[DEF76]], implicit [[DEF77]], implicit [[DEF78]], implicit [[DEF79]], implicit [[DEF80]], implicit [[DEF81]], implicit [[DEF82]], implicit [[DEF83]], implicit [[DEF84]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF85]], implicit [[DEF86]], implicit [[DEF87]], implicit [[DEF88]], implicit [[DEF89]], implicit [[DEF90]], implicit [[DEF91]], implicit [[DEF92]], implicit [[DEF93]], implicit [[DEF94]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF95]], implicit [[DEF96]], implicit [[DEF97]], implicit [[DEF98]], implicit [[DEF99]], implicit [[DEF100]], implicit [[DEF101]], implicit [[DEF102]], implicit [[DEF103]], implicit [[DEF104]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF105]], implicit [[DEF106]], implicit [[DEF107]], implicit [[DEF108]], implicit [[DEF109]], implicit [[DEF110]], implicit [[DEF111]], implicit [[DEF112]], implicit [[DEF113]], implicit [[DEF114]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF115]], implicit [[DEF116]], implicit [[DEF117]], implicit [[DEF118]], implicit [[DEF119]], implicit [[DEF120]], implicit [[DEF121]], implicit [[DEF122]], implicit [[DEF123]], implicit [[DEF124]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF125]], implicit [[DEF126]], implicit [[DEF127]], implicit [[DEF128]], implicit [[DEF129]], implicit [[DEF130]], implicit [[DEF131]], implicit [[DEF132]], implicit [[DEF133]], implicit [[DEF134]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF135]], implicit [[DEF136]], implicit [[DEF137]], implicit [[DEF138]], implicit [[DEF139]], implicit [[DEF140]], implicit [[DEF141]], implicit [[DEF142]], implicit [[DEF143]], implicit [[DEF144]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF145]], implicit [[DEF146]], implicit [[DEF147]], implicit [[DEF148]], implicit [[DEF149]], implicit [[DEF150]], implicit [[DEF151]], implicit [[DEF152]], implicit [[DEF153]], implicit [[DEF154]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF155]], implicit [[DEF156]], implicit [[DEF157]], implicit [[DEF158]], implicit [[DEF159]], implicit [[DEF160]], implicit [[DEF161]], implicit [[DEF162]], implicit [[DEF163]], implicit [[DEF164]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF165]], implicit [[DEF166]], implicit [[DEF167]], implicit [[DEF168]], implicit [[DEF169]], implicit [[DEF170]], implicit [[DEF171]], implicit [[DEF172]], implicit [[DEF173]], implicit [[DEF174]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF175]], implicit [[DEF176]], implicit [[DEF177]], implicit [[DEF178]], implicit [[DEF179]], implicit [[DEF180]], implicit [[DEF181]], implicit [[DEF182]], implicit [[DEF183]], implicit [[DEF184]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF185]], implicit [[DEF186]], implicit [[DEF187]], implicit [[DEF188]], implicit [[DEF189]], implicit [[DEF190]], implicit [[DEF191]], implicit [[DEF192]], implicit [[DEF193]], implicit [[DEF194]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF195]], implicit [[DEF196]], implicit [[DEF197]], implicit [[DEF198]], implicit [[DEF199]], implicit [[DEF200]], implicit [[DEF201]], implicit [[DEF202]], implicit [[DEF203]], implicit [[DEF204]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF205]], implicit [[DEF206]], implicit [[DEF207]], implicit [[DEF208]], implicit [[DEF209]], implicit [[DEF210]], implicit [[DEF211]], implicit [[DEF212]], implicit [[DEF213]], implicit [[DEF214]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF215]], implicit [[DEF216]], implicit [[DEF217]], implicit [[DEF218]], implicit [[DEF219]], implicit [[DEF220]], implicit [[DEF221]], implicit [[DEF222]], implicit [[DEF223]], implicit [[DEF224]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF225]], implicit [[DEF226]], implicit [[DEF227]], implicit [[DEF228]], implicit [[DEF229]], implicit [[DEF230]], implicit [[DEF231]], implicit [[DEF232]], implicit [[DEF233]], implicit [[DEF234]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF235]], implicit [[DEF236]], implicit [[DEF237]], implicit [[DEF238]], implicit [[DEF239]], implicit [[DEF240]], implicit [[DEF241]], implicit [[DEF242]], implicit [[DEF243]], implicit [[DEF244]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF245]], implicit [[DEF246]], implicit [[DEF247]], implicit [[DEF248]], implicit [[DEF249]], implicit [[DEF250]], implicit [[DEF251]], implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
; GFX90A-NEXT: S_ENDPGM 0
bb.0:
More information about the llvm-commits
mailing list