[llvm] Do not revert schedule to avoid spilling when spilling via copies (PR #165001)
Tony Linthicum via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 24 08:35:43 PDT 2025
https://github.com/tlinthic created https://github.com/llvm/llvm-project/pull/165001
Do not revert schedule to avoid spilling if the spilling is in the form of AGPR<->VGPR copies and the new schedule is more profitable. Currently under a flag that either disallows the optimization, enables it but ensures an improved schedule, or allows it without checking for schedule improvement provided no spills to memory will result.
>From 9464153c0ca70818ef10e55a1f3088cdf3d6d05a Mon Sep 17 00:00:00 2001
From: Tony Linthicum <tlinthic at gmail.com>
Date: Fri, 24 Oct 2025 08:16:45 -0500
Subject: [PATCH] Do not revert schedule to avoid spilling with copies
Do not revert schedule to avoid spilling if the spilling is in the form of
AGPR<->VGPR copies and the new schedule is more profitable. Currently under
a flag that either disallows the optimization, enables it but ensures an
improved schedule, or allows it without checking for schedule improvement
provided no spills to memory will result.
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 88 ++++++++++++++++++++-
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 20 ++++-
2 files changed, 102 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 9fbf9e5fe8eeb..be30e15fbaa86 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -75,6 +75,22 @@ static cl::opt<unsigned> PendingQueueLimit(
"Max (Available+Pending) size to inspect pending queue (0 disables)"),
cl::init(256));
+static cl::opt<float> SpillCopyLatencyScale(
+ "amdgpu-spill-copy-latency-scale", cl::Hidden,
+ cl::desc(
+ "Sets the factor by which we scale the latency impact of allowing"
+ "AGPR/VGPR copies to be inserted for spilling."),
+ cl::init(.5));
+
+static cl::opt<unsigned> AllowAVGPRCopiesForSpill(
+ "amdgpu-allow-avgpr-copies-for-spill", cl::Hidden,
+ cl::desc(
+ "Allow the introduction of avgpr copies for vgpr spilling"
+ "rather than reverting the schedule. 0=disallow (default), "
+ "1=allow if no memory spilling, 2=same as 1, but require"
+ "an improved schedule"),
+ cl::init(0));
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
#define DUMP_MAX_REG_PRESSURE
static cl::opt<bool> PrintMaxRPRegUsageBeforeScheduler(
@@ -108,6 +124,8 @@ void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
VGPRExcessLimit =
Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
+ AGPRExcessLimit =
+ Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::AGPR_32RegClass);
SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
// Set the initial TargetOccupnacy to the maximum occupancy that we can
@@ -145,6 +163,7 @@ void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
VGPRCriticalLimit -= std::min(VGPRLimitBias + ErrorMargin, VGPRCriticalLimit);
SGPRExcessLimit -= std::min(SGPRLimitBias + ErrorMargin, SGPRExcessLimit);
VGPRExcessLimit -= std::min(VGPRLimitBias + ErrorMargin, VGPRExcessLimit);
+ SGPRExcessLimit -= std::min(AGPRLimitBias + ErrorMargin, AGPRExcessLimit);
LLVM_DEBUG(dbgs() << "VGPRCriticalLimit = " << VGPRCriticalLimit
<< ", VGPRExcessLimit = " << VGPRExcessLimit
@@ -1232,6 +1251,7 @@ bool UnclusteredHighRPStage::initGCNSchedStage() {
// stage. Temporarily increase occupancy target in the region.
S.SGPRLimitBias = S.HighRPSGPRBias;
S.VGPRLimitBias = S.HighRPVGPRBias;
+ S.AGPRLimitBias = S.HighRPAGPRBias;
if (MFI.getMaxWavesPerEU() > DAG.MinOccupancy)
MFI.increaseOccupancy(MF, ++DAG.MinOccupancy);
@@ -1318,7 +1338,7 @@ void GCNSchedStage::finalizeGCNSchedStage() {
void UnclusteredHighRPStage::finalizeGCNSchedStage() {
SavedMutations.swap(DAG.Mutations);
- S.SGPRLimitBias = S.VGPRLimitBias = 0;
+ S.SGPRLimitBias = S.VGPRLimitBias = S.AGPRLimitBias = 0;
if (DAG.MinOccupancy > InitialOccupancy) {
LLVM_DEBUG(dbgs() << StageID
<< " stage successfully increased occupancy to "
@@ -1739,9 +1759,73 @@ bool MemoryClauseInitialScheduleStage::shouldRevertScheduling(
return mayCauseSpilling(WavesAfter);
}
+bool GCNSchedStage::spillsAsCopiesProfitable() {
+ if (!AllowAVGPRCopiesForSpill)
+ return false;
+
+ unsigned MaxAGPR = S.AGPRExcessLimit;
+ if (MaxAGPR == 0)
+ // AGPR not supported on architecture.
+ return false;
+
+ // For now, only consider allowing copies profitable if occupancy was
+ // already 1.
+ unsigned TargetOccupancy = std::min(
+ S.getTargetOccupancy(), ST.getOccupancyWithWorkGroupSizes(MF).second);
+ unsigned WavesBefore = std::min(
+ TargetOccupancy,
+ PressureBefore.getOccupancy(ST, DAG.MFI.getDynamicVGPRBlockSize()));
+
+ if (WavesBefore != 1)
+ return false;
+
+ // Only allow copies when VGPR pressure is the problem.
+ unsigned MaxSGPR = S.SGPRExcessLimit;
+ unsigned NumSGPR = PressureAfter.getSGPRNum();
+ if (NumSGPR > MaxSGPR)
+ return false;
+
+ unsigned MaxVGPR = S.VGPRExcessLimit;
+ unsigned NumAGPR = PressureAfter.getAGPRNum();
+ unsigned NumVGPR = PressureAfter.getVGPRNum(ST.hasGFX90AInsts());
+ unsigned NumAVGPR = PressureAfter.getAVGPRNum();
+
+ // We are assuming that in the presence of excessive VGPR requirements, that
+ // AVGPR virtuals with be assigned to AGPRs. This is almost certainly too
+ // optimistic as these can still generate copies, but we can't know how many
+ // we will get.
+ int NumSpillCopies = NumVGPR - MaxVGPR - NumAVGPR;
+ if (NumSpillCopies <= 0)
+ return true;
+
+ assert(NumVGPR > MaxVGPR);
+ if (NumAGPR + NumSpillCopies > MaxAGPR)
+ return false;
+
+ if (AllowAVGPRCopiesForSpill == 1)
+ return true;
+
+ ScheduleMetrics MBefore = getScheduleMetrics(DAG.SUnits);
+ auto LengthBefore = MBefore.getLength();
+ ScheduleMetrics MAfter = getScheduleMetrics(DAG);
+ auto LengthAfter = MAfter.getLength();
+
+ const TargetSchedModel &SM = ST.getInstrInfo()->getSchedModel();
+ unsigned AccReadLatency =
+ SM.computeInstrLatency(AMDGPU::V_ACCVGPR_READ_B32_e64);
+ unsigned AccWriteLatency =
+ SM.computeInstrLatency(AMDGPU::V_ACCVGPR_WRITE_B32_e64);
+ unsigned SpillCopyLatency =
+ NumSpillCopies * (AccReadLatency + AccWriteLatency);
+
+ SpillCopyLatency *= SpillCopyLatencyScale;
+
+ return (LengthAfter + SpillCopyLatency) < LengthBefore;
+}
+
bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) {
if (WavesAfter <= MFI.getMinWavesPerEU() && isRegionWithExcessRP() &&
- !PressureAfter.less(MF, PressureBefore)) {
+ !PressureAfter.less(MF, PressureBefore) && !spillsAsCopiesProfitable()) {
LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 975781fea9452..0496cdf7ff24d 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -74,10 +74,6 @@ class GCNSchedStrategy : public GenericScheduler {
std::vector<unsigned> MaxPressure;
- unsigned SGPRExcessLimit;
-
- unsigned VGPRExcessLimit;
-
unsigned TargetOccupancy;
MachineFunction *MF;
@@ -114,14 +110,25 @@ class GCNSchedStrategy : public GenericScheduler {
// Bias for VGPR limits under a high register pressure.
const unsigned HighRPVGPRBias = 7;
+ // Bias for AGPR limits under a high register pressure.
+ const unsigned HighRPAGPRBias = 7;
+
unsigned SGPRCriticalLimit;
unsigned VGPRCriticalLimit;
+ unsigned SGPRExcessLimit;
+
+ unsigned VGPRExcessLimit;
+
+ unsigned AGPRExcessLimit;
+
unsigned SGPRLimitBias = 0;
unsigned VGPRLimitBias = 0;
+ unsigned AGPRLimitBias = 0;
+
GCNSchedStrategy(const MachineSchedContext *C);
SUnit *pickNode(bool &IsTopNode) override;
@@ -394,6 +401,11 @@ class GCNSchedStage {
// The region number this stage is currently working on
unsigned getRegionIdx() { return RegionIdx; }
+ // Returns true if spilling caused by the new schedule will be in
+ // the form of AVGPR <-> VGPR copies and adding those copies to
+ // the new schedule is still better than reverting.
+ bool spillsAsCopiesProfitable();
+
// Returns true if the new schedule may result in more spilling.
bool mayCauseSpilling(unsigned WavesAfter);
More information about the llvm-commits
mailing list