[llvm] [AMDGPU] Add `GCNRPTarget` to track register pressure against a target (PR #145765)
Lucas Ramirez via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 25 12:15:27 PDT 2025
https://github.com/lucas-rami updated https://github.com/llvm/llvm-project/pull/145765
>From 1b7074c4cd237c382247e58b68edcc6642431a9b Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Wed, 25 Jun 2025 16:49:56 +0000
Subject: [PATCH 1/2] Move excess RP tracking logic to `GCNRPTarget`
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 63 +++++
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 118 +++++++--
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 226 +++---------------
...ine-scheduler-sink-trivial-remats-attr.mir | 157 ++++++------
4 files changed, 272 insertions(+), 292 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index eed3fb20f5be8..7d6723a6108be 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -361,6 +361,69 @@ static LaneBitmask findUseBetween(unsigned Reg, LaneBitmask LastUseMask,
return LastUseMask;
}
+////////////////////////////////////////////////////////////////////////////////
+// GCNRPTarget
+
+GCNRPTarget::GCNRPTarget(const MachineFunction &MF, const GCNRegPressure &RP,
+ bool CombineVGPRSavings)
+ : RP(RP), CombineVGPRSavings(CombineVGPRSavings) {
+ const Function &F = MF.getFunction();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ setRegLimits(ST.getMaxNumSGPRs(F), ST.getMaxNumVGPRs(F), MF);
+}
+
+GCNRPTarget::GCNRPTarget(unsigned NumSGPRs, unsigned NumVGPRs,
+ const MachineFunction &MF, const GCNRegPressure &RP,
+ bool CombineVGPRSavings)
+ : RP(RP), CombineVGPRSavings(CombineVGPRSavings) {
+ setRegLimits(NumSGPRs, NumVGPRs, MF);
+}
+
+GCNRPTarget::GCNRPTarget(unsigned Occupancy, const MachineFunction &MF,
+ const GCNRegPressure &RP, bool CombineVGPRSavings)
+ : RP(RP), CombineVGPRSavings(CombineVGPRSavings) {
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ unsigned DynamicVGPRBlockSize =
+ MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
+ setRegLimits(ST.getMaxNumSGPRs(Occupancy, /*Addressable=*/false),
+ ST.getMaxNumVGPRs(Occupancy, DynamicVGPRBlockSize), MF);
+}
+
+void GCNRPTarget::setRegLimits(unsigned NumSGPRs, unsigned NumVGPRs,
+ const MachineFunction &MF) {
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ unsigned DynamicVGPRBlockSize =
+ MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
+ MaxSGPRs = std::min(ST.getAddressableNumSGPRs(), NumSGPRs);
+ MaxVGPRs = std::min(ST.getAddressableNumArchVGPRs(), NumVGPRs);
+ MaxUnifiedVGPRs =
+ ST.hasGFX90AInsts()
+ ? std::min(ST.getAddressableNumVGPRs(DynamicVGPRBlockSize), NumVGPRs)
+ : 0;
+}
+
+bool GCNRPTarget::isSaveBeneficial(Register Reg,
+ const MachineRegisterInfo &MRI) const {
+ const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(TRI);
+
+ if (SRI->isSGPRClass(RC))
+ return RP.getSGPRNum() > MaxSGPRs;
+ unsigned NumVGPRs =
+ SRI->isAGPRClass(RC) ? RP.getAGPRNum() : RP.getArchVGPRNum();
+ return isVGPRBankSaveBeneficial(NumVGPRs);
+}
+
+bool GCNRPTarget::satisfied() const {
+ if (RP.getSGPRNum() > MaxSGPRs)
+ return false;
+ if (RP.getVGPRNum(false) > MaxVGPRs &&
+ (!CombineVGPRSavings || !satisifiesVGPRBanksTarget()))
+ return false;
+ return satisfiesUnifiedTarget();
+}
+
///////////////////////////////////////////////////////////////////////////////
// GCNRPTracker
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 397e891c8d806..32db3b78aaae5 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -31,9 +31,7 @@ class SlotIndex;
struct GCNRegPressure {
enum RegKind { SGPR, VGPR, AGPR, TOTAL_KINDS };
- GCNRegPressure() {
- clear();
- }
+ GCNRegPressure() { clear(); }
bool empty() const { return !Value[SGPR] && !Value[VGPR] && !Value[AGPR]; }
@@ -76,9 +74,7 @@ struct GCNRegPressure {
DynamicVGPRBlockSize));
}
- void inc(unsigned Reg,
- LaneBitmask PrevMask,
- LaneBitmask NewMask,
+ void inc(unsigned Reg, LaneBitmask PrevMask, LaneBitmask NewMask,
const MachineRegisterInfo &MRI);
bool higherOccupancy(const GCNSubtarget &ST, const GCNRegPressure &O,
@@ -106,9 +102,7 @@ struct GCNRegPressure {
return std::equal(&Value[0], &Value[ValueArraySize], O.Value);
}
- bool operator!=(const GCNRegPressure &O) const {
- return !(*this == O);
- }
+ bool operator!=(const GCNRegPressure &O) const { return !(*this == O); }
GCNRegPressure &operator+=(const GCNRegPressure &RHS) {
for (unsigned I = 0; I < ValueArraySize; ++I)
@@ -134,8 +128,7 @@ struct GCNRegPressure {
static unsigned getRegKind(const TargetRegisterClass *RC,
const SIRegisterInfo *STI);
- friend GCNRegPressure max(const GCNRegPressure &P1,
- const GCNRegPressure &P2);
+ friend GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2);
friend Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST,
unsigned DynamicVGPRBlockSize);
@@ -162,6 +155,101 @@ inline GCNRegPressure operator-(const GCNRegPressure &P1,
return Diff;
}
+////////////////////////////////////////////////////////////////////////////////
+// GCNRPTarget
+
+/// Models a register pressure target, allowing to evaluate and track register
+/// savings against that target from a starting \ref GCNRegPressure.
+class GCNRPTarget {
+public:
+ /// Sets up the target such that the register pressure starting at \p RP does
+ /// not show register spilling on function \p MF (w.r.t. the function's
+ /// mininum target occupancy).
+ GCNRPTarget(const MachineFunction &MF, const GCNRegPressure &RP,
+ bool CombineVGPRSavings = false);
+
+ /// Sets up the target such that the register pressure starting at \p RP does
+ /// not use more than \p NumSGPRs SGPRs and \p NumVGPRs VGPRs on function \p
+ /// MF.
+ GCNRPTarget(unsigned NumSGPRs, unsigned NumVGPRs, const MachineFunction &MF,
+ const GCNRegPressure &RP, bool CombineVGPRSavings = false);
+
+ /// Sets up the target such that the register pressure starting at \p RP does
+ /// not prevent achieving an occupancy of at least \p Occupancy on function
+ /// \p MF.
+ GCNRPTarget(unsigned Occupancy, const MachineFunction &MF,
+ const GCNRegPressure &RP, bool CombineVGPRSavings = false);
+
+ const GCNRegPressure &getCurrentRP() const { return RP; }
+
+ void setRP(const GCNRegPressure &NewRP) { RP = NewRP; }
+
+ /// Determines whether saving virtual register \p Reg will be beneficial
+ /// towards achieving the RP target.
+ bool isSaveBeneficial(Register Reg, const MachineRegisterInfo &MRI) const;
+
+ /// Saves virtual register \p Reg with lanemask \p Mask.
+ void saveReg(Register Reg, LaneBitmask Mask, const MachineRegisterInfo &MRI) {
+ RP.inc(Reg, Mask, LaneBitmask::getNone(), MRI);
+ }
+
+ /// Whether the current RP is at or below the defined pressure target.
+ bool satisfied() const;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ friend raw_ostream &operator<<(raw_ostream &OS, const GCNRPTarget &Target) {
+ OS << "Actual/Target: " << Target.RP.getSGPRNum() << '/' << Target.MaxSGPRs
+ << " SGPRs, " << Target.RP.getArchVGPRNum() << '/' << Target.MaxVGPRs
+ << " ArchVGPRs, " << Target.RP.getAGPRNum() << '/' << Target.MaxVGPRs
+ << " AGPRs";
+
+ if (Target.MaxUnifiedVGPRs) {
+ OS << ", " << Target.RP.getVGPRNum(true) << '/' << Target.MaxUnifiedVGPRs
+ << " VGPRs (unified)";
+ } else if (Target.CombineVGPRSavings) {
+ OS << ", " << Target.RP.getArchVGPRNum() + Target.RP.getAGPRNum() << '/'
+ << 2 * Target.MaxVGPRs << " VGPRs (combined target)";
+ }
+ return OS;
+ }
+#endif
+
+private:
+ /// Current register pressure.
+ GCNRegPressure RP;
+
+ /// Target number of SGPRs.
+ unsigned MaxSGPRs;
+ /// Target number of ArchVGPRs and AGPRs.
+ unsigned MaxVGPRs;
+ /// Target number of overall VGPRs for subtargets with unified RFs. Always 0
+ /// for subtargets with non-unified RFs.
+ unsigned MaxUnifiedVGPRs;
+ /// Whether we consider that the register allocator will be able to swap
+ /// between ArchVGPRs and AGPRs by copying them to a super register class.
+ /// Concretely, this allows savings in one of the VGPR banks to help toward
+ /// savings in the other VGPR bank.
+ bool CombineVGPRSavings;
+
+ inline bool satisifiesVGPRBanksTarget() const {
+ assert(CombineVGPRSavings && "only makes sense with combined savings");
+ return RP.getArchVGPRNum() + RP.getAGPRNum() <= 2 * MaxVGPRs;
+ }
+
+ /// Always satisified when the subtarget doesn't have a unified RF.
+ inline bool satisfiesUnifiedTarget() const {
+ return !MaxUnifiedVGPRs || RP.getVGPRNum(true) <= MaxUnifiedVGPRs;
+ }
+
+ inline bool isVGPRBankSaveBeneficial(unsigned NumVGPRs) const {
+ return NumVGPRs > MaxVGPRs || !satisfiesUnifiedTarget() ||
+ (CombineVGPRSavings && !satisifiesVGPRBanksTarget());
+ }
+
+ void setRegLimits(unsigned MaxSGPRs, unsigned MaxVGPRs,
+ const MachineFunction &MF);
+};
+
///////////////////////////////////////////////////////////////////////////////
// GCNRPTracker
@@ -197,9 +285,7 @@ class GCNRPTracker {
GCNRegPressure getPressure() const { return CurPressure; }
- decltype(LiveRegs) moveLiveRegs() {
- return std::move(LiveRegs);
- }
+ decltype(LiveRegs) moveLiveRegs() { return std::move(LiveRegs); }
};
GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
@@ -345,7 +431,7 @@ GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
/// Note: there is no entry in the map for instructions with empty live reg set
/// Complexity = O(NumVirtRegs * averageLiveRangeSegmentsPerReg * lg(R))
template <typename Range>
-DenseMap<MachineInstr*, GCNRPTracker::LiveRegSet>
+DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS) {
std::vector<SlotIndex> Indexes;
Indexes.reserve(std::distance(R.begin(), R.end()));
@@ -370,7 +456,7 @@ getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS) {
if (!LI.hasSubRanges()) {
for (auto SI : LiveIdxs)
LiveRegMap[SII.getInstructionFromIndex(SI)][Reg] =
- MRI.getMaxLaneMaskForVReg(Reg);
+ MRI.getMaxLaneMaskForVReg(Reg);
} else
for (const auto &S : LI.subranges()) {
// constrain search for subranges by indexes live at main range
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 8aae340de791e..fce8f36d45969 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1699,184 +1699,17 @@ bool PreRARematStage::allUsesAvailableAt(const MachineInstr *InstToRemat,
return true;
}
-namespace {
-/// Models excess register pressure in a region and tracks our progress as we
-/// identify rematerialization opportunities.
-struct ExcessRP {
- /// Number of excess SGPRs.
- unsigned SGPRs = 0;
- /// Number of excess ArchVGPRs.
- unsigned ArchVGPRs = 0;
- /// Number of excess AGPRs.
- unsigned AGPRs = 0;
- /// For unified register files, number of excess VGPRs. 0 otherwise.
- unsigned VGPRs = 0;
- /// For unified register files with AGPR usage, number of excess ArchVGPRs to
- /// save before we are able to save a whole allocation granule.
- unsigned ArchVGPRsToAlignment = 0;
- /// Whether the region uses AGPRs.
- bool HasAGPRs = false;
- /// Whether the subtarget has a unified RF.
- bool UnifiedRF;
- /// Whether we consider that the register allocator will be able to swap
- /// between ArchVGPRs and AGPRs by copying them to a super register class.
- /// Concretely, this allows savings of one kind of VGPR to help toward savings
- /// the other kind of VGPR.
- bool CombineVGPRSavings;
-
- /// Constructs the excess RP model; determines the excess pressure w.r.t. a
- /// maximum number of allowed SGPRs/VGPRs.
- ExcessRP(const GCNSubtarget &ST, const GCNRegPressure &RP, unsigned MaxSGPRs,
- unsigned MaxVGPRs, bool CombineVGPRSavings);
-
- /// Accounts for \p NumRegs saved SGPRs in the model. Returns whether saving
- /// these SGPRs helped reduce excess pressure.
- bool saveSGPRs(unsigned NumRegs) { return saveRegs(SGPRs, NumRegs); }
-
- /// Accounts for \p NumRegs saved ArchVGPRs in the model. Returns whether
- /// saving these ArchGPRs helped reduce excess pressure.
- bool saveArchVGPRs(unsigned NumRegs);
-
- /// Accounts for \p NumRegs saved AGPRs in the model. Returns whether saving
- /// these AGPRs helped reduce excess pressure.
- bool saveAGPRs(unsigned NumRegs);
-
- /// Returns whether there is any excess register pressure.
- operator bool() const { return SGPRs || ArchVGPRs || AGPRs || VGPRs; }
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- friend raw_ostream &operator<<(raw_ostream &OS, const ExcessRP &Excess) {
- OS << Excess.SGPRs << " SGPRs, " << Excess.ArchVGPRs << " ArchVGPRs, and "
- << Excess.AGPRs << " AGPRs, (" << Excess.VGPRs
- << " VGPRs in total, next ArchVGPR aligment in "
- << Excess.ArchVGPRsToAlignment << " registers)\n";
- return OS;
- }
-#endif
-
-private:
- static inline bool saveRegs(unsigned &LeftToSave, unsigned &NumRegs) {
- unsigned NumSaved = std::min(LeftToSave, NumRegs);
- NumRegs -= NumSaved;
- LeftToSave -= NumSaved;
- return NumSaved;
- }
-};
-} // namespace
-
-ExcessRP::ExcessRP(const GCNSubtarget &ST, const GCNRegPressure &RP,
- unsigned MaxSGPRs, unsigned MaxVGPRs,
- bool CombineVGPRSavings)
- : UnifiedRF(ST.hasGFX90AInsts()), CombineVGPRSavings(CombineVGPRSavings) {
- // Compute excess SGPR pressure.
- unsigned NumSGPRs = RP.getSGPRNum();
- if (NumSGPRs > MaxSGPRs)
- SGPRs = NumSGPRs - MaxSGPRs;
-
- // Compute excess ArchVGPR/AGPR pressure.
- unsigned NumArchVGPRs = RP.getArchVGPRNum();
- unsigned NumAGPRs = RP.getAGPRNum();
- HasAGPRs = NumAGPRs;
- if (!UnifiedRF) {
- // Non-unified RF. Account for excess pressure for ArchVGPRs and AGPRs
- // independently.
- if (NumArchVGPRs > MaxVGPRs)
- ArchVGPRs = NumArchVGPRs - MaxVGPRs;
- if (NumAGPRs > MaxVGPRs)
- AGPRs = NumAGPRs - MaxVGPRs;
- return;
- }
-
- // Independently of whether overall VGPR pressure is under the limit, we still
- // have to check whether ArchVGPR pressure or AGPR pressure alone exceeds the
- // number of addressable registers in each category.
- const unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs();
- if (NumArchVGPRs > MaxArchVGPRs) {
- ArchVGPRs = NumArchVGPRs - MaxArchVGPRs;
- NumArchVGPRs = MaxArchVGPRs;
- }
- if (NumAGPRs > MaxArchVGPRs) {
- AGPRs = NumAGPRs - MaxArchVGPRs;
- NumAGPRs = MaxArchVGPRs;
- }
-
- // Check overall VGPR usage against the limit; any excess above addressable
- // register limits has already been accounted for.
- const unsigned Granule = AMDGPU::IsaInfo::getArchVGPRAllocGranule();
- unsigned NumVGPRs = GCNRegPressure::getUnifiedVGPRNum(NumArchVGPRs, NumAGPRs);
- if (NumVGPRs > MaxVGPRs) {
- VGPRs = NumVGPRs - MaxVGPRs;
- ArchVGPRsToAlignment = NumArchVGPRs - alignDown(NumArchVGPRs, Granule);
- if (!ArchVGPRsToAlignment)
- ArchVGPRsToAlignment = Granule;
- }
-}
-
-bool ExcessRP::saveArchVGPRs(unsigned NumRegs) {
- bool Progress = saveRegs(ArchVGPRs, NumRegs);
- if (!NumRegs)
- return Progress;
-
- if (!UnifiedRF) {
- if (CombineVGPRSavings)
- Progress |= saveRegs(AGPRs, NumRegs);
- } else if (HasAGPRs && (VGPRs || (CombineVGPRSavings && AGPRs))) {
- // There is progress as long as there are VGPRs left to save, even if the
- // save induced by this particular call does not cross an ArchVGPR alignment
- // barrier.
- Progress = true;
-
- // ArchVGPRs can only be allocated as a multiple of a granule in unified RF.
- unsigned NumSavedRegs = 0;
-
- // Count the number of whole ArchVGPR allocation granules we can save.
- const unsigned Granule = AMDGPU::IsaInfo::getArchVGPRAllocGranule();
- if (unsigned NumGranules = NumRegs / Granule; NumGranules) {
- NumSavedRegs = NumGranules * Granule;
- NumRegs -= NumSavedRegs;
- }
-
- // We may be able to save one more whole ArchVGPR allocation granule.
- if (NumRegs >= ArchVGPRsToAlignment) {
- NumSavedRegs += Granule;
- ArchVGPRsToAlignment = Granule - (NumRegs - ArchVGPRsToAlignment);
- } else {
- ArchVGPRsToAlignment -= NumRegs;
- }
-
- // Prioritize saving generic VGPRs, then AGPRs if we consider that the
- // register allocator will be able to replace an AGPR with an ArchVGPR.
- saveRegs(VGPRs, NumSavedRegs);
- if (CombineVGPRSavings)
- saveRegs(AGPRs, NumSavedRegs);
- } else {
- // No AGPR usage in the region i.e., no allocation granule to worry about.
- Progress |= saveRegs(VGPRs, NumRegs);
- }
- return Progress;
-}
-
-bool ExcessRP::saveAGPRs(unsigned NumRegs) {
- bool Progress = saveRegs(AGPRs, NumRegs);
- if (UnifiedRF)
- Progress |= saveRegs(VGPRs, NumRegs);
- if (CombineVGPRSavings)
- Progress |= saveRegs(ArchVGPRs, NumRegs);
- return Progress;
-}
-
bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
- const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
-
REMAT_DEBUG({
dbgs() << "Collecting rematerializable instructions in ";
MF.getFunction().printAsOperand(dbgs(), false);
dbgs() << '\n';
});
- // Maps optimizable regions (i.e., regions at minimum and VGPR-limited
- // occupancy, or regions with VGPR spilling) to a model of their excess RP.
- DenseMap<unsigned, ExcessRP> OptRegions;
+ // Maps optimizable regions (i.e., regions at minimum and register-limited
+ // occupancy, or regions with spilling) to the target RP we would like to
+ // reach.
+ DenseMap<unsigned, GCNRPTarget> OptRegions;
const Function &F = MF.getFunction();
unsigned DynamicVGPRBlockSize =
MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
@@ -1901,32 +1734,35 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
// the register allocator i.e., the scheduler will not be able to reason
// about these savings and will not report an increase in the achievable
// occupancy, triggering rollbacks.
- ExcessRP Excess(ST, RP, MaxSGPRsNoSpill, MaxVGPRsNoSpill,
- /*CombineVGPRSavings=*/true);
- if (Excess && IncreaseOccupancy) {
+ GCNRPTarget Target(MaxSGPRsNoSpill, MaxVGPRsNoSpill, MF, RP,
+ /*CombineVGPRSavings=*/true);
+ if (!Target.satisfied() && IncreaseOccupancy) {
// There is spilling in the region and we were so far trying to increase
// occupancy. Strop trying that and focus on reducing spilling.
IncreaseOccupancy = false;
OptRegions.clear();
} else if (IncreaseOccupancy) {
// There is no spilling in the region, try to increase occupancy.
- Excess = ExcessRP(ST, RP, MaxSGPRsIncOcc, MaxVGPRsIncOcc,
- /*CombineVGPRSavings=*/false);
+ Target = GCNRPTarget(MaxSGPRsIncOcc, MaxVGPRsIncOcc, MF, RP,
+ /*CombineVGPRSavings=*/false);
}
- if (Excess)
- OptRegions.insert({I, Excess});
+ if (!Target.satisfied())
+ OptRegions.insert({I, Target});
}
if (OptRegions.empty())
return false;
#ifndef NDEBUG
- if (IncreaseOccupancy)
- REMAT_DEBUG(dbgs() << "Occupancy minimal in regions:\n");
- else
- REMAT_DEBUG(dbgs() << "Spilling in regions:\n");
+ if (IncreaseOccupancy) {
+ REMAT_DEBUG(dbgs() << "Occupancy minimal (" << DAG.MinOccupancy
+ << ") in regions:\n");
+ } else {
+ REMAT_DEBUG(dbgs() << "Spilling w.r.t. minimum target occupancy ("
+ << WavesPerEU.first << ") in regions:\n");
+ }
for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
if (auto OptIt = OptRegions.find(I); OptIt != OptRegions.end())
- REMAT_DEBUG(dbgs() << " " << I << ": " << OptIt->getSecond() << '\n');
+ REMAT_DEBUG(dbgs() << " [" << I << "] " << OptIt->getSecond() << '\n');
}
#endif
@@ -1941,18 +1777,14 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
// estimate that we have identified enough rematerialization opportunities to
// achieve our goal, and sets Progress to true when this particular reduction
// in pressure was helpful toward that goal.
- auto ReduceRPInRegion = [&](auto OptIt, LaneBitmask Mask,
- const TargetRegisterClass *RC,
+ auto ReduceRPInRegion = [&](auto OptIt, Register Reg, LaneBitmask Mask,
bool &Progress) -> bool {
- ExcessRP &Excess = OptIt->getSecond();
- unsigned NumRegs = SIRegisterInfo::getNumCoveredRegs(Mask);
- if (SRI->isSGPRClass(RC))
- Progress |= Excess.saveSGPRs(NumRegs);
- else if (SRI->isAGPRClass(RC))
- Progress |= Excess.saveAGPRs(NumRegs);
- else
- Progress |= Excess.saveArchVGPRs(NumRegs);
- if (!Excess)
+ GCNRPTarget &Target = OptIt->getSecond();
+ if (!Target.isSaveBeneficial(Reg, DAG.MRI))
+ return false;
+ Progress = true;
+ Target.saveReg(Reg, Mask, DAG.MRI);
+ if (Target.satisfied())
OptRegions.erase(OptIt->getFirst());
return OptRegions.empty();
};
@@ -2013,7 +1845,6 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
Rematerializations.try_emplace(&DefMI, UseMI).first->second;
bool RematUseful = false;
- const TargetRegisterClass *RC = DAG.MRI.getRegClass(Reg);
if (auto It = OptRegions.find(I); It != OptRegions.end()) {
// Optimistically consider that moving the instruction out of its
// defining region will reduce RP in the latter; this assumes that
@@ -2021,7 +1852,7 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
// instruction and the end of the region.
REMAT_DEBUG(dbgs() << " Defining region is optimizable\n");
LaneBitmask Mask = DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I)[Reg];
- if (ReduceRPInRegion(It, Mask, RC, RematUseful))
+ if (ReduceRPInRegion(It, Reg, Mask, RematUseful))
return true;
}
@@ -2041,7 +1872,8 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
// instruction's use.
if (auto It = OptRegions.find(LIRegion); It != OptRegions.end()) {
REMAT_DEBUG(dbgs() << " Live-in in region " << LIRegion << '\n');
- if (ReduceRPInRegion(It, DAG.LiveIns[LIRegion][Reg], RC, RematUseful))
+ if (ReduceRPInRegion(It, Reg, DAG.LiveIns[LIRegion][Reg],
+ RematUseful))
return true;
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir
index 7ef4b0f9b6108..23412aaeb2e23 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir
@@ -6,7 +6,7 @@
define void @small_num_sgprs_as_spill() "amdgpu-num-sgpr"="85" {
ret void
}
- define void @small_num_vgprs_as_spill() "amdgpu-num-vgpr"="28" {
+ define void @small_num_vgprs_as_spill() "amdgpu-num-vgpr"="14" {
ret void
}
define void @dont_remat_waves_per_eu() "amdgpu-flat-work-group-size"="1024,1024" "amdgpu-waves-per-eu"="7,7" {
@@ -391,14 +391,14 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
; GFX908-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]], implicit [[V_CVT_I32_F64_e32_27]]
; GFX908-NEXT: S_ENDPGM 0
@@ -434,13 +434,7 @@ body: |
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.1:
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
@@ -448,8 +442,14 @@ body: |
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]]
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]]
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]], implicit [[V_CVT_I32_F64_e32_27]]
; GFX90A-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1
@@ -1090,8 +1090,8 @@ body: |
S_ENDPGM 0
...
# Requested [min,max] occupancy is [1,2]. There are 257 ArchVGPRs in use when
-# only 256 are available. We should just try to eliminate spilling by saving one
-# ArchVGPR.
+# only 256 are available. Nothing to do since it should be possible to use an
+# AGPR for one of the ArchVGPRs.
---
name: reduce_spill_archvgpr_above_addressable_limit
tracksRegLiveness: true
@@ -1103,6 +1103,7 @@ body: |
; GFX908-NEXT: successors: %bb.1(0x80000000)
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
@@ -1386,7 +1387,6 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_220]], implicit [[V_CVT_I32_F64_e32_221]], implicit [[V_CVT_I32_F64_e32_222]], implicit [[V_CVT_I32_F64_e32_223]], implicit [[V_CVT_I32_F64_e32_224]], implicit [[V_CVT_I32_F64_e32_225]], implicit [[V_CVT_I32_F64_e32_226]], implicit [[V_CVT_I32_F64_e32_227]], implicit [[V_CVT_I32_F64_e32_228]], implicit [[V_CVT_I32_F64_e32_229]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_230]], implicit [[V_CVT_I32_F64_e32_231]], implicit [[V_CVT_I32_F64_e32_232]], implicit [[V_CVT_I32_F64_e32_233]], implicit [[V_CVT_I32_F64_e32_234]], implicit [[V_CVT_I32_F64_e32_235]], implicit [[V_CVT_I32_F64_e32_236]], implicit [[V_CVT_I32_F64_e32_237]], implicit [[V_CVT_I32_F64_e32_238]], implicit [[V_CVT_I32_F64_e32_239]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_240]], implicit [[V_CVT_I32_F64_e32_241]], implicit [[V_CVT_I32_F64_e32_242]], implicit [[V_CVT_I32_F64_e32_243]], implicit [[V_CVT_I32_F64_e32_244]], implicit [[V_CVT_I32_F64_e32_245]], implicit [[V_CVT_I32_F64_e32_246]], implicit [[V_CVT_I32_F64_e32_247]], implicit [[V_CVT_I32_F64_e32_248]], implicit [[V_CVT_I32_F64_e32_249]]
- ; GFX908-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_250]], implicit [[V_CVT_I32_F64_e32_251]], implicit [[V_CVT_I32_F64_e32_252]], implicit [[V_CVT_I32_F64_e32_253]], implicit [[V_CVT_I32_F64_e32_254]], implicit [[V_CVT_I32_F64_e32_255]], implicit [[V_CVT_I32_F64_e32_256]], implicit [[DEF]]
; GFX908-NEXT: S_ENDPGM 0
;
@@ -1649,8 +1649,9 @@ body: |
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_252:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 252, implicit $exec, implicit $mode, implicit-def $m0
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_253:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 253, implicit $exec, implicit $mode, implicit-def $m0
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_254:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 254, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_255:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 255, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_256:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 256, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_255:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 256, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_256:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 255, implicit $exec, implicit $mode
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.1:
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
@@ -1678,8 +1679,7 @@ body: |
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_220]], implicit [[V_CVT_I32_F64_e32_221]], implicit [[V_CVT_I32_F64_e32_222]], implicit [[V_CVT_I32_F64_e32_223]], implicit [[V_CVT_I32_F64_e32_224]], implicit [[V_CVT_I32_F64_e32_225]], implicit [[V_CVT_I32_F64_e32_226]], implicit [[V_CVT_I32_F64_e32_227]], implicit [[V_CVT_I32_F64_e32_228]], implicit [[V_CVT_I32_F64_e32_229]]
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_230]], implicit [[V_CVT_I32_F64_e32_231]], implicit [[V_CVT_I32_F64_e32_232]], implicit [[V_CVT_I32_F64_e32_233]], implicit [[V_CVT_I32_F64_e32_234]], implicit [[V_CVT_I32_F64_e32_235]], implicit [[V_CVT_I32_F64_e32_236]], implicit [[V_CVT_I32_F64_e32_237]], implicit [[V_CVT_I32_F64_e32_238]], implicit [[V_CVT_I32_F64_e32_239]]
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_240]], implicit [[V_CVT_I32_F64_e32_241]], implicit [[V_CVT_I32_F64_e32_242]], implicit [[V_CVT_I32_F64_e32_243]], implicit [[V_CVT_I32_F64_e32_244]], implicit [[V_CVT_I32_F64_e32_245]], implicit [[V_CVT_I32_F64_e32_246]], implicit [[V_CVT_I32_F64_e32_247]], implicit [[V_CVT_I32_F64_e32_248]], implicit [[V_CVT_I32_F64_e32_249]]
- ; GFX90A-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_250]], implicit [[V_CVT_I32_F64_e32_251]], implicit [[V_CVT_I32_F64_e32_252]], implicit [[V_CVT_I32_F64_e32_253]], implicit [[V_CVT_I32_F64_e32_254]], implicit [[V_CVT_I32_F64_e32_255]], implicit [[V_CVT_I32_F64_e32_256]], implicit [[DEF]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_250]], implicit [[V_CVT_I32_F64_e32_251]], implicit [[V_CVT_I32_F64_e32_252]], implicit [[V_CVT_I32_F64_e32_253]], implicit [[V_CVT_I32_F64_e32_254]], implicit [[V_CVT_I32_F64_e32_256]], implicit [[V_CVT_I32_F64_e32_255]], implicit [[DEF]]
; GFX90A-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1
@@ -1976,9 +1976,8 @@ body: |
S_ENDPGM 0
...
# Requested [min,max] occupancy is [1,2]. There are 257 AGPRs in use when only
-# 256 are available. We should just try to eliminate spilling by saving one
-# AGPR or ArchVGPR (we assume we will be able to spill AGPRs to ArchVGPRs in
-# such cases).
+# 256 are available. Nothing to do since it should be possible to use an
+# ArchVGPR for one of the AGPRs.
---
name: reduce_spill_agpr_above_addressable_limit
tracksRegLiveness: true
@@ -1989,7 +1988,8 @@ body: |
; GFX908: bb.0:
; GFX908-NEXT: successors: %bb.1(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 258, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 257, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 258, implicit $exec, implicit $mode
; GFX908-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF1:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: [[DEF2:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2249,33 +2249,32 @@ body: |
; GFX908-NEXT: [[DEF256:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]], implicit [[DEF9]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]], implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]], implicit [[DEF18]], implicit [[DEF19]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]], implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]], implicit [[DEF28]], implicit [[DEF29]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF30]], implicit [[DEF31]], implicit [[DEF32]], implicit [[DEF33]], implicit [[DEF34]], implicit [[DEF35]], implicit [[DEF36]], implicit [[DEF37]], implicit [[DEF38]], implicit [[DEF39]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF40]], implicit [[DEF41]], implicit [[DEF42]], implicit [[DEF43]], implicit [[DEF44]], implicit [[DEF45]], implicit [[DEF46]], implicit [[DEF47]], implicit [[DEF48]], implicit [[DEF49]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF50]], implicit [[DEF51]], implicit [[DEF52]], implicit [[DEF53]], implicit [[DEF54]], implicit [[DEF55]], implicit [[DEF56]], implicit [[DEF57]], implicit [[DEF58]], implicit [[DEF59]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF60]], implicit [[DEF61]], implicit [[DEF62]], implicit [[DEF63]], implicit [[DEF64]], implicit [[DEF65]], implicit [[DEF66]], implicit [[DEF67]], implicit [[DEF68]], implicit [[DEF69]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF70]], implicit [[DEF71]], implicit [[DEF72]], implicit [[DEF73]], implicit [[DEF74]], implicit [[DEF75]], implicit [[DEF76]], implicit [[DEF77]], implicit [[DEF78]], implicit [[DEF79]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF80]], implicit [[DEF81]], implicit [[DEF82]], implicit [[DEF83]], implicit [[DEF84]], implicit [[DEF85]], implicit [[DEF86]], implicit [[DEF87]], implicit [[DEF88]], implicit [[DEF89]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF90]], implicit [[DEF91]], implicit [[DEF92]], implicit [[DEF93]], implicit [[DEF94]], implicit [[DEF95]], implicit [[DEF96]], implicit [[DEF97]], implicit [[DEF98]], implicit [[DEF99]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF100]], implicit [[DEF101]], implicit [[DEF102]], implicit [[DEF103]], implicit [[DEF104]], implicit [[DEF105]], implicit [[DEF106]], implicit [[DEF107]], implicit [[DEF108]], implicit [[DEF109]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF110]], implicit [[DEF111]], implicit [[DEF112]], implicit [[DEF113]], implicit [[DEF114]], implicit [[DEF115]], implicit [[DEF116]], implicit [[DEF117]], implicit [[DEF118]], implicit [[DEF119]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF120]], implicit [[DEF121]], implicit [[DEF122]], implicit [[DEF123]], implicit [[DEF124]], implicit [[DEF125]], implicit [[DEF126]], implicit [[DEF127]], implicit [[DEF128]], implicit [[DEF129]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF130]], implicit [[DEF131]], implicit [[DEF132]], implicit [[DEF133]], implicit [[DEF134]], implicit [[DEF135]], implicit [[DEF136]], implicit [[DEF137]], implicit [[DEF138]], implicit [[DEF139]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF140]], implicit [[DEF141]], implicit [[DEF142]], implicit [[DEF143]], implicit [[DEF144]], implicit [[DEF145]], implicit [[DEF146]], implicit [[DEF147]], implicit [[DEF148]], implicit [[DEF149]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF150]], implicit [[DEF151]], implicit [[DEF152]], implicit [[DEF153]], implicit [[DEF154]], implicit [[DEF155]], implicit [[DEF156]], implicit [[DEF157]], implicit [[DEF158]], implicit [[DEF159]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF160]], implicit [[DEF161]], implicit [[DEF162]], implicit [[DEF163]], implicit [[DEF164]], implicit [[DEF165]], implicit [[DEF166]], implicit [[DEF167]], implicit [[DEF168]], implicit [[DEF169]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF170]], implicit [[DEF171]], implicit [[DEF172]], implicit [[DEF173]], implicit [[DEF174]], implicit [[DEF175]], implicit [[DEF176]], implicit [[DEF177]], implicit [[DEF178]], implicit [[DEF179]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF180]], implicit [[DEF181]], implicit [[DEF182]], implicit [[DEF183]], implicit [[DEF184]], implicit [[DEF185]], implicit [[DEF186]], implicit [[DEF187]], implicit [[DEF188]], implicit [[DEF189]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF190]], implicit [[DEF191]], implicit [[DEF192]], implicit [[DEF193]], implicit [[DEF194]], implicit [[DEF195]], implicit [[DEF196]], implicit [[DEF197]], implicit [[DEF198]], implicit [[DEF199]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF200]], implicit [[DEF201]], implicit [[DEF202]], implicit [[DEF203]], implicit [[DEF204]], implicit [[DEF205]], implicit [[DEF206]], implicit [[DEF207]], implicit [[DEF208]], implicit [[DEF209]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF210]], implicit [[DEF211]], implicit [[DEF212]], implicit [[DEF213]], implicit [[DEF214]], implicit [[DEF215]], implicit [[DEF216]], implicit [[DEF217]], implicit [[DEF218]], implicit [[DEF219]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF220]], implicit [[DEF221]], implicit [[DEF222]], implicit [[DEF223]], implicit [[DEF224]], implicit [[DEF225]], implicit [[DEF226]], implicit [[DEF227]], implicit [[DEF228]], implicit [[DEF229]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF230]], implicit [[DEF231]], implicit [[DEF232]], implicit [[DEF233]], implicit [[DEF234]], implicit [[DEF235]], implicit [[DEF236]], implicit [[DEF237]], implicit [[DEF238]], implicit [[DEF239]]
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF240]], implicit [[DEF241]], implicit [[DEF242]], implicit [[DEF243]], implicit [[DEF244]], implicit [[DEF245]], implicit [[DEF246]], implicit [[DEF247]], implicit [[DEF248]], implicit [[DEF249]]
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 257, implicit $exec, implicit $mode
- ; GFX908-NEXT: S_NOP 0, implicit [[DEF250]], implicit [[DEF251]], implicit [[DEF252]], implicit [[DEF253]], implicit [[DEF254]], implicit [[DEF255]], implicit [[DEF256]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF128]], implicit [[DEF129]], implicit [[DEF130]], implicit [[DEF131]], implicit [[DEF132]], implicit [[DEF133]], implicit [[DEF134]], implicit [[DEF135]], implicit [[DEF136]], implicit [[DEF137]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF138]], implicit [[DEF139]], implicit [[DEF140]], implicit [[DEF141]], implicit [[DEF142]], implicit [[DEF143]], implicit [[DEF144]], implicit [[DEF145]], implicit [[DEF146]], implicit [[DEF147]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF148]], implicit [[DEF149]], implicit [[DEF150]], implicit [[DEF151]], implicit [[DEF152]], implicit [[DEF153]], implicit [[DEF154]], implicit [[DEF155]], implicit [[DEF156]], implicit [[DEF157]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF158]], implicit [[DEF159]], implicit [[DEF160]], implicit [[DEF161]], implicit [[DEF162]], implicit [[DEF163]], implicit [[DEF164]], implicit [[DEF165]], implicit [[DEF166]], implicit [[DEF167]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF168]], implicit [[DEF169]], implicit [[DEF170]], implicit [[DEF171]], implicit [[DEF172]], implicit [[DEF173]], implicit [[DEF174]], implicit [[DEF175]], implicit [[DEF176]], implicit [[DEF177]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF178]], implicit [[DEF179]], implicit [[DEF180]], implicit [[DEF181]], implicit [[DEF182]], implicit [[DEF183]], implicit [[DEF184]], implicit [[DEF185]], implicit [[DEF186]], implicit [[DEF187]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF188]], implicit [[DEF189]], implicit [[DEF190]], implicit [[DEF191]], implicit [[DEF192]], implicit [[DEF193]], implicit [[DEF194]], implicit [[DEF195]], implicit [[DEF196]], implicit [[DEF197]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF198]], implicit [[DEF199]], implicit [[DEF200]], implicit [[DEF201]], implicit [[DEF202]], implicit [[DEF203]], implicit [[DEF204]], implicit [[DEF205]], implicit [[DEF206]], implicit [[DEF207]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF208]], implicit [[DEF209]], implicit [[DEF210]], implicit [[DEF211]], implicit [[DEF212]], implicit [[DEF213]], implicit [[DEF214]], implicit [[DEF215]], implicit [[DEF216]], implicit [[DEF217]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF218]], implicit [[DEF219]], implicit [[DEF220]], implicit [[DEF221]], implicit [[DEF222]], implicit [[DEF223]], implicit [[DEF224]], implicit [[DEF225]], implicit [[DEF226]], implicit [[DEF227]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF228]], implicit [[DEF229]], implicit [[DEF230]], implicit [[DEF231]], implicit [[DEF232]], implicit [[DEF233]], implicit [[DEF234]], implicit [[DEF235]], implicit [[DEF236]], implicit [[DEF237]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF238]], implicit [[DEF239]], implicit [[DEF240]], implicit [[DEF241]], implicit [[DEF242]], implicit [[DEF243]], implicit [[DEF244]], implicit [[DEF245]], implicit [[DEF246]], implicit [[DEF247]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF248]], implicit [[DEF249]], implicit [[DEF250]], implicit [[DEF251]], implicit [[DEF252]], implicit [[DEF253]], implicit [[DEF254]], implicit [[DEF255]], implicit [[DEF256]], implicit [[DEF]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]], implicit [[DEF9]], implicit [[DEF10]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF11]], implicit [[DEF12]], implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]], implicit [[DEF18]], implicit [[DEF19]], implicit [[DEF20]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF21]], implicit [[DEF22]], implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]], implicit [[DEF28]], implicit [[DEF29]], implicit [[DEF30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF31]], implicit [[DEF32]], implicit [[DEF33]], implicit [[DEF34]], implicit [[DEF35]], implicit [[DEF36]], implicit [[DEF37]], implicit [[DEF38]], implicit [[DEF39]], implicit [[DEF40]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF41]], implicit [[DEF42]], implicit [[DEF43]], implicit [[DEF44]], implicit [[DEF45]], implicit [[DEF46]], implicit [[DEF47]], implicit [[DEF48]], implicit [[DEF49]], implicit [[DEF50]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF51]], implicit [[DEF52]], implicit [[DEF53]], implicit [[DEF54]], implicit [[DEF55]], implicit [[DEF56]], implicit [[DEF57]], implicit [[DEF58]], implicit [[DEF59]], implicit [[DEF60]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF61]], implicit [[DEF62]], implicit [[DEF63]], implicit [[DEF64]], implicit [[DEF65]], implicit [[DEF66]], implicit [[DEF67]], implicit [[DEF68]], implicit [[DEF69]], implicit [[DEF70]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF71]], implicit [[DEF72]], implicit [[DEF73]], implicit [[DEF74]], implicit [[DEF75]], implicit [[DEF76]], implicit [[DEF77]], implicit [[DEF78]], implicit [[DEF79]], implicit [[DEF80]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF81]], implicit [[DEF82]], implicit [[DEF83]], implicit [[DEF84]], implicit [[DEF85]], implicit [[DEF86]], implicit [[DEF87]], implicit [[DEF88]], implicit [[DEF89]], implicit [[DEF90]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF91]], implicit [[DEF92]], implicit [[DEF93]], implicit [[DEF94]], implicit [[DEF95]], implicit [[DEF96]], implicit [[DEF97]], implicit [[DEF98]], implicit [[DEF99]], implicit [[DEF100]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF101]], implicit [[DEF102]], implicit [[DEF103]], implicit [[DEF104]], implicit [[DEF105]], implicit [[DEF106]], implicit [[DEF107]], implicit [[DEF108]], implicit [[DEF109]], implicit [[DEF110]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF111]], implicit [[DEF112]], implicit [[DEF113]], implicit [[DEF114]], implicit [[DEF115]], implicit [[DEF116]], implicit [[DEF117]], implicit [[DEF118]], implicit [[DEF119]], implicit [[DEF120]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[DEF121]], implicit [[DEF122]], implicit [[DEF123]], implicit [[DEF124]], implicit [[DEF125]], implicit [[DEF126]], implicit [[DEF127]], implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
; GFX908-NEXT: S_ENDPGM 0
;
; GFX90A-LABEL: name: reduce_spill_agpr_above_addressable_limit
@@ -2534,41 +2533,41 @@ body: |
; GFX90A-NEXT: [[DEF249:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF250:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF251:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 257, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 258, implicit $exec, implicit $mode
; GFX90A-NEXT: [[DEF252:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF253:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF254:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[DEF255:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 257, implicit $exec, implicit $mode
- ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 258, implicit $exec, implicit $mode
+ ; GFX90A-NEXT: [[DEF256:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.1:
- ; GFX90A-NEXT: [[DEF256:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF256]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]], implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]], implicit [[DEF18]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]], implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]], implicit [[DEF28]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF29]], implicit [[DEF30]], implicit [[DEF31]], implicit [[DEF32]], implicit [[DEF33]], implicit [[DEF34]], implicit [[DEF35]], implicit [[DEF36]], implicit [[DEF37]], implicit [[DEF38]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF39]], implicit [[DEF40]], implicit [[DEF41]], implicit [[DEF42]], implicit [[DEF43]], implicit [[DEF44]], implicit [[DEF45]], implicit [[DEF46]], implicit [[DEF47]], implicit [[DEF48]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF49]], implicit [[DEF50]], implicit [[DEF51]], implicit [[DEF52]], implicit [[DEF53]], implicit [[DEF54]], implicit [[DEF55]], implicit [[DEF56]], implicit [[DEF57]], implicit [[DEF58]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF59]], implicit [[DEF60]], implicit [[DEF61]], implicit [[DEF62]], implicit [[DEF63]], implicit [[DEF64]], implicit [[DEF65]], implicit [[DEF66]], implicit [[DEF67]], implicit [[DEF68]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF69]], implicit [[DEF70]], implicit [[DEF71]], implicit [[DEF72]], implicit [[DEF73]], implicit [[DEF74]], implicit [[DEF75]], implicit [[DEF76]], implicit [[DEF77]], implicit [[DEF78]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF79]], implicit [[DEF80]], implicit [[DEF81]], implicit [[DEF82]], implicit [[DEF83]], implicit [[DEF84]], implicit [[DEF85]], implicit [[DEF86]], implicit [[DEF87]], implicit [[DEF88]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF89]], implicit [[DEF90]], implicit [[DEF91]], implicit [[DEF92]], implicit [[DEF93]], implicit [[DEF94]], implicit [[DEF95]], implicit [[DEF96]], implicit [[DEF97]], implicit [[DEF98]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF99]], implicit [[DEF100]], implicit [[DEF101]], implicit [[DEF102]], implicit [[DEF103]], implicit [[DEF104]], implicit [[DEF105]], implicit [[DEF106]], implicit [[DEF107]], implicit [[DEF108]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF109]], implicit [[DEF110]], implicit [[DEF111]], implicit [[DEF112]], implicit [[DEF113]], implicit [[DEF114]], implicit [[DEF115]], implicit [[DEF116]], implicit [[DEF117]], implicit [[DEF118]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF119]], implicit [[DEF120]], implicit [[DEF121]], implicit [[DEF122]], implicit [[DEF123]], implicit [[DEF124]], implicit [[DEF125]], implicit [[DEF126]], implicit [[DEF127]], implicit [[DEF128]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF129]], implicit [[DEF130]], implicit [[DEF131]], implicit [[DEF132]], implicit [[DEF133]], implicit [[DEF134]], implicit [[DEF135]], implicit [[DEF136]], implicit [[DEF137]], implicit [[DEF138]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF139]], implicit [[DEF140]], implicit [[DEF141]], implicit [[DEF142]], implicit [[DEF143]], implicit [[DEF144]], implicit [[DEF145]], implicit [[DEF146]], implicit [[DEF147]], implicit [[DEF148]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF149]], implicit [[DEF150]], implicit [[DEF151]], implicit [[DEF152]], implicit [[DEF153]], implicit [[DEF154]], implicit [[DEF155]], implicit [[DEF156]], implicit [[DEF157]], implicit [[DEF158]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF159]], implicit [[DEF160]], implicit [[DEF161]], implicit [[DEF162]], implicit [[DEF163]], implicit [[DEF164]], implicit [[DEF165]], implicit [[DEF166]], implicit [[DEF167]], implicit [[DEF168]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF169]], implicit [[DEF170]], implicit [[DEF171]], implicit [[DEF172]], implicit [[DEF173]], implicit [[DEF174]], implicit [[DEF175]], implicit [[DEF176]], implicit [[DEF177]], implicit [[DEF178]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF179]], implicit [[DEF180]], implicit [[DEF181]], implicit [[DEF182]], implicit [[DEF183]], implicit [[DEF184]], implicit [[DEF185]], implicit [[DEF186]], implicit [[DEF187]], implicit [[DEF188]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF189]], implicit [[DEF190]], implicit [[DEF191]], implicit [[DEF192]], implicit [[DEF193]], implicit [[DEF194]], implicit [[DEF195]], implicit [[DEF196]], implicit [[DEF197]], implicit [[DEF198]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF199]], implicit [[DEF200]], implicit [[DEF201]], implicit [[DEF202]], implicit [[DEF203]], implicit [[DEF204]], implicit [[DEF205]], implicit [[DEF206]], implicit [[DEF207]], implicit [[DEF208]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF209]], implicit [[DEF210]], implicit [[DEF211]], implicit [[DEF212]], implicit [[DEF213]], implicit [[DEF214]], implicit [[DEF215]], implicit [[DEF216]], implicit [[DEF217]], implicit [[DEF218]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF219]], implicit [[DEF220]], implicit [[DEF221]], implicit [[DEF222]], implicit [[DEF223]], implicit [[DEF224]], implicit [[DEF225]], implicit [[DEF226]], implicit [[DEF227]], implicit [[DEF228]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF229]], implicit [[DEF230]], implicit [[DEF231]], implicit [[DEF232]], implicit [[DEF233]], implicit [[DEF234]], implicit [[DEF235]], implicit [[DEF236]], implicit [[DEF237]], implicit [[DEF238]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF239]], implicit [[DEF240]], implicit [[DEF241]], implicit [[DEF242]], implicit [[DEF243]], implicit [[DEF244]], implicit [[DEF245]], implicit [[DEF246]], implicit [[DEF247]], implicit [[DEF248]]
- ; GFX90A-NEXT: S_NOP 0, implicit [[DEF249]], implicit [[DEF250]], implicit [[DEF251]], implicit [[DEF252]], implicit [[DEF253]], implicit [[DEF254]], implicit [[DEF255]], implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF252]], implicit [[DEF253]], implicit [[DEF254]], implicit [[DEF255]], implicit [[DEF256]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]], implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]], implicit [[DEF13]], implicit [[DEF14]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]], implicit [[DEF18]], implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]], implicit [[DEF23]], implicit [[DEF24]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]], implicit [[DEF28]], implicit [[DEF29]], implicit [[DEF30]], implicit [[DEF31]], implicit [[DEF32]], implicit [[DEF33]], implicit [[DEF34]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF35]], implicit [[DEF36]], implicit [[DEF37]], implicit [[DEF38]], implicit [[DEF39]], implicit [[DEF40]], implicit [[DEF41]], implicit [[DEF42]], implicit [[DEF43]], implicit [[DEF44]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF45]], implicit [[DEF46]], implicit [[DEF47]], implicit [[DEF48]], implicit [[DEF49]], implicit [[DEF50]], implicit [[DEF51]], implicit [[DEF52]], implicit [[DEF53]], implicit [[DEF54]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF55]], implicit [[DEF56]], implicit [[DEF57]], implicit [[DEF58]], implicit [[DEF59]], implicit [[DEF60]], implicit [[DEF61]], implicit [[DEF62]], implicit [[DEF63]], implicit [[DEF64]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF65]], implicit [[DEF66]], implicit [[DEF67]], implicit [[DEF68]], implicit [[DEF69]], implicit [[DEF70]], implicit [[DEF71]], implicit [[DEF72]], implicit [[DEF73]], implicit [[DEF74]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF75]], implicit [[DEF76]], implicit [[DEF77]], implicit [[DEF78]], implicit [[DEF79]], implicit [[DEF80]], implicit [[DEF81]], implicit [[DEF82]], implicit [[DEF83]], implicit [[DEF84]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF85]], implicit [[DEF86]], implicit [[DEF87]], implicit [[DEF88]], implicit [[DEF89]], implicit [[DEF90]], implicit [[DEF91]], implicit [[DEF92]], implicit [[DEF93]], implicit [[DEF94]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF95]], implicit [[DEF96]], implicit [[DEF97]], implicit [[DEF98]], implicit [[DEF99]], implicit [[DEF100]], implicit [[DEF101]], implicit [[DEF102]], implicit [[DEF103]], implicit [[DEF104]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF105]], implicit [[DEF106]], implicit [[DEF107]], implicit [[DEF108]], implicit [[DEF109]], implicit [[DEF110]], implicit [[DEF111]], implicit [[DEF112]], implicit [[DEF113]], implicit [[DEF114]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF115]], implicit [[DEF116]], implicit [[DEF117]], implicit [[DEF118]], implicit [[DEF119]], implicit [[DEF120]], implicit [[DEF121]], implicit [[DEF122]], implicit [[DEF123]], implicit [[DEF124]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF125]], implicit [[DEF126]], implicit [[DEF127]], implicit [[DEF128]], implicit [[DEF129]], implicit [[DEF130]], implicit [[DEF131]], implicit [[DEF132]], implicit [[DEF133]], implicit [[DEF134]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF135]], implicit [[DEF136]], implicit [[DEF137]], implicit [[DEF138]], implicit [[DEF139]], implicit [[DEF140]], implicit [[DEF141]], implicit [[DEF142]], implicit [[DEF143]], implicit [[DEF144]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF145]], implicit [[DEF146]], implicit [[DEF147]], implicit [[DEF148]], implicit [[DEF149]], implicit [[DEF150]], implicit [[DEF151]], implicit [[DEF152]], implicit [[DEF153]], implicit [[DEF154]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF155]], implicit [[DEF156]], implicit [[DEF157]], implicit [[DEF158]], implicit [[DEF159]], implicit [[DEF160]], implicit [[DEF161]], implicit [[DEF162]], implicit [[DEF163]], implicit [[DEF164]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF165]], implicit [[DEF166]], implicit [[DEF167]], implicit [[DEF168]], implicit [[DEF169]], implicit [[DEF170]], implicit [[DEF171]], implicit [[DEF172]], implicit [[DEF173]], implicit [[DEF174]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF175]], implicit [[DEF176]], implicit [[DEF177]], implicit [[DEF178]], implicit [[DEF179]], implicit [[DEF180]], implicit [[DEF181]], implicit [[DEF182]], implicit [[DEF183]], implicit [[DEF184]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF185]], implicit [[DEF186]], implicit [[DEF187]], implicit [[DEF188]], implicit [[DEF189]], implicit [[DEF190]], implicit [[DEF191]], implicit [[DEF192]], implicit [[DEF193]], implicit [[DEF194]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF195]], implicit [[DEF196]], implicit [[DEF197]], implicit [[DEF198]], implicit [[DEF199]], implicit [[DEF200]], implicit [[DEF201]], implicit [[DEF202]], implicit [[DEF203]], implicit [[DEF204]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF205]], implicit [[DEF206]], implicit [[DEF207]], implicit [[DEF208]], implicit [[DEF209]], implicit [[DEF210]], implicit [[DEF211]], implicit [[DEF212]], implicit [[DEF213]], implicit [[DEF214]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF215]], implicit [[DEF216]], implicit [[DEF217]], implicit [[DEF218]], implicit [[DEF219]], implicit [[DEF220]], implicit [[DEF221]], implicit [[DEF222]], implicit [[DEF223]], implicit [[DEF224]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF225]], implicit [[DEF226]], implicit [[DEF227]], implicit [[DEF228]], implicit [[DEF229]], implicit [[DEF230]], implicit [[DEF231]], implicit [[DEF232]], implicit [[DEF233]], implicit [[DEF234]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF235]], implicit [[DEF236]], implicit [[DEF237]], implicit [[DEF238]], implicit [[DEF239]], implicit [[DEF240]], implicit [[DEF241]], implicit [[DEF242]], implicit [[DEF243]], implicit [[DEF244]]
+ ; GFX90A-NEXT: S_NOP 0, implicit [[DEF245]], implicit [[DEF246]], implicit [[DEF247]], implicit [[DEF248]], implicit [[DEF249]], implicit [[DEF250]], implicit [[DEF251]], implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
; GFX90A-NEXT: S_ENDPGM 0
bb.0:
>From 5e1d5b495faa3d60499a2c7f9016fee9c8cd3109 Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Wed, 25 Jun 2025 19:15:06 +0000
Subject: [PATCH 2/2] Revert spurious formatting changes
---
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 21 +++++++++++++++------
1 file changed, 15 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 32db3b78aaae5..3749b6d1efc63 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -31,7 +31,9 @@ class SlotIndex;
struct GCNRegPressure {
enum RegKind { SGPR, VGPR, AGPR, TOTAL_KINDS };
- GCNRegPressure() { clear(); }
+ GCNRegPressure() {
+ clear();
+ }
bool empty() const { return !Value[SGPR] && !Value[VGPR] && !Value[AGPR]; }
@@ -74,7 +76,9 @@ struct GCNRegPressure {
DynamicVGPRBlockSize));
}
- void inc(unsigned Reg, LaneBitmask PrevMask, LaneBitmask NewMask,
+ void inc(unsigned Reg,
+ LaneBitmask PrevMask,
+ LaneBitmask NewMask,
const MachineRegisterInfo &MRI);
bool higherOccupancy(const GCNSubtarget &ST, const GCNRegPressure &O,
@@ -102,7 +106,9 @@ struct GCNRegPressure {
return std::equal(&Value[0], &Value[ValueArraySize], O.Value);
}
- bool operator!=(const GCNRegPressure &O) const { return !(*this == O); }
+ bool operator!=(const GCNRegPressure &O) const {
+ return !(*this == O);
+ }
GCNRegPressure &operator+=(const GCNRegPressure &RHS) {
for (unsigned I = 0; I < ValueArraySize; ++I)
@@ -128,7 +134,8 @@ struct GCNRegPressure {
static unsigned getRegKind(const TargetRegisterClass *RC,
const SIRegisterInfo *STI);
- friend GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2);
+ friend GCNRegPressure max(const GCNRegPressure &P1,
+ const GCNRegPressure &P2);
friend Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST,
unsigned DynamicVGPRBlockSize);
@@ -285,7 +292,9 @@ class GCNRPTracker {
GCNRegPressure getPressure() const { return CurPressure; }
- decltype(LiveRegs) moveLiveRegs() { return std::move(LiveRegs); }
+ decltype(LiveRegs) moveLiveRegs() {
+ return std::move(LiveRegs);
+ }
};
GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
@@ -431,7 +440,7 @@ GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
/// Note: there is no entry in the map for instructions with empty live reg set
/// Complexity = O(NumVirtRegs * averageLiveRangeSegmentsPerReg * lg(R))
template <typename Range>
-DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
+DenseMap<MachineInstr*, GCNRPTracker::LiveRegSet>
getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS) {
std::vector<SlotIndex> Indexes;
Indexes.reserve(std::distance(R.begin(), R.end()));
More information about the llvm-commits
mailing list