[llvm] AMDGPU: Track AGPR pressure (PR #150288)

Wed Jul 23 12:02:24 PDT 2025

https://github.com/Nicholas-Baron created https://github.com/llvm/llvm-project/pull/150288

None

>From d8784774ce71fc037033468f70a4365c4a6580af Mon Sep 17 00:00:00 2001
From: Nicholas Baron <Nicholas.Baron at amd.com>
Date: Mon, 21 Jul 2025 17:31:17 -0500
Subject: [PATCH 1/4] AMDGPU: Begin tracking AGPR pressure

---
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp   | 45 ++++++++++++++++---
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.h     | 10 +++--
 llvm/lib/Target/AMDGPU/GCNSubtarget.cpp       |  5 +++
 llvm/lib/Target/AMDGPU/GCNSubtarget.h         | 16 ++++---
 .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp    |  9 ++++
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h |  8 ++++
 6 files changed, 77 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index a6553083d722b..6d529110676ae 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -86,6 +86,8 @@ void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
       Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
   VGPRExcessLimit =
       Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
+  AGPRExcessLimit =
+      Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::AGPR_32RegClass);
 
   SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
   // Set the initial TargetOccupnacy to the maximum occupancy that we can
@@ -98,6 +100,9 @@ void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
   SGPRCriticalLimit =
       std::min(ST.getMaxNumSGPRs(TargetOccupancy, true), SGPRExcessLimit);
 
+  AGPRCriticalLimit =
+      std::min(ST.getMaxNumAGPRs(TargetOccupancy), AGPRExcessLimit);
+
   if (!KnownExcessRP) {
     VGPRCriticalLimit = std::min(
         ST.getMaxNumVGPRs(TargetOccupancy, MFI.getDynamicVGPRBlockSize()),
@@ -201,7 +206,8 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
                                      const RegPressureTracker &RPTracker,
                                      const SIRegisterInfo *SRI,
                                      unsigned SGPRPressure,
-                                     unsigned VGPRPressure, bool IsBottomUp) {
+                                     unsigned VGPRPressure,
+                                     unsigned AGPRPressure, bool IsBottomUp) {
   Cand.SU = SU;
   Cand.AtTop = AtTop;
 
@@ -230,6 +236,7 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
     Pressure.resize(4, 0);
     Pressure[AMDGPU::RegisterPressureSets::SReg_32] = SGPRPressure;
     Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = VGPRPressure;
+    Pressure[AMDGPU::RegisterPressureSets::AGPR_32] = AGPRPressure;
 
     for (const auto &Diff : DAG->getPressureDiff(SU)) {
       if (!Diff.isValid())
@@ -247,7 +254,9 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
     if (Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=
             CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||
         Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=
-            CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32]) {
+            CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] ||
+        Pressure[AMDGPU::RegisterPressureSets::AGPR_32] !=
+            CheckPressure[AMDGPU::RegisterPressureSets::AGPR_32]) {
       errs() << "Register Pressure is inaccurate when calculated through "
                 "PressureDiff\n"
              << "SGPR got " << Pressure[AMDGPU::RegisterPressureSets::SReg_32]
@@ -255,7 +264,10 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
              << CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] << "\n"
              << "VGPR got " << Pressure[AMDGPU::RegisterPressureSets::VGPR_32]
              << ", expected "
-             << CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n";
+             << CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n"
+             << "AGPR got " << Pressure[AMDGPU::RegisterPressureSets::AGPR_32]
+             << ", expected "
+             << CheckPressure[AMDGPU::RegisterPressureSets::AGPR_32] << "\n";
       report_fatal_error("inaccurate register pressure calculation");
     }
 #endif
@@ -263,6 +275,7 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
 
   unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
   unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
+  unsigned NewAGPRPressure = Pressure[AMDGPU::RegisterPressureSets::AGPR_32];
 
   // If two instructions increase the pressure of different register sets
   // by the same amount, the generic scheduler will prefer to schedule the
@@ -272,7 +285,8 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
   // only for VGPRs or only for SGPRs.
 
   // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
-  const unsigned MaxVGPRPressureInc = 16;
+  static constexpr unsigned MaxVGPRPressureInc = 16;
+  bool ShouldTrackAGPRs = AGPRPressure >= AGPRExcessLimit;
   bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
   bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
 
@@ -291,6 +305,12 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
     Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
   }
 
+  if (ShouldTrackAGPRs && NewAGPRPressure >= AGPRPressure) {
+    HasHighPressure = true;
+    Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::AGPR_32);
+    Cand.RPDelta.Excess.setUnitInc(NewAGPRPressure - AGPRExcessLimit);
+  }
+
   if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
     HasHighPressure = true;
     Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
@@ -304,13 +324,19 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
 
   int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
   int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
+  int AGPRDelta = NewAGPRPressure - AGPRCriticalLimit;
 
-  if (SGPRDelta >= 0 || VGPRDelta >= 0) {
+  if (SGPRDelta >= 0 || VGPRDelta >= 0 || AGPRDelta >= 0) {
     HasHighPressure = true;
+    // Prioritize reducing the VGPRDelta if both are >= 0
     if (SGPRDelta > VGPRDelta) {
       Cand.RPDelta.CriticalMax =
           PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
       Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
+    } else if (AGPRDelta > VGPRDelta) {
+      Cand.RPDelta.CriticalMax =
+          PressureChange(AMDGPU::RegisterPressureSets::AGPR_32);
+      Cand.RPDelta.CriticalMax.setUnitInc(AGPRDelta);
     } else {
       Cand.RPDelta.CriticalMax =
           PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
@@ -330,16 +356,19 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
   ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
   unsigned SGPRPressure = 0;
   unsigned VGPRPressure = 0;
+  unsigned AGPRPressure = 0;
   if (DAG->isTrackingPressure()) {
     if (!GCNTrackers) {
       SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
       VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
+      AGPRPressure = Pressure[AMDGPU::RegisterPressureSets::AGPR_32];
     } else {
       GCNRPTracker *T = IsBottomUp
                             ? static_cast<GCNRPTracker *>(&UpwardTracker)
                             : static_cast<GCNRPTracker *>(&DownwardTracker);
       SGPRPressure = T->getPressure().getSGPRNum();
       VGPRPressure = T->getPressure().getArchVGPRNum();
+      AGPRPressure = T->getPressure().getAGPRNum();
     }
   }
   ReadyQueue &Q = Zone.Available;
@@ -347,7 +376,7 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
 
     SchedCandidate TryCand(ZonePolicy);
     initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure,
-                  VGPRPressure, IsBottomUp);
+                  VGPRPressure, AGPRPressure, IsBottomUp);
     // Pass SchedBoundary only when comparing nodes from the same boundary.
     SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
     tryCandidate(Cand, TryCand, ZoneArg);
@@ -1331,6 +1360,10 @@ void GCNSchedStage::checkScheduling() {
   unsigned MaxArchVGPRs = std::min(MaxVGPRs, ST.getAddressableNumArchVGPRs());
   unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
 
+  unsigned MaxAGPRs = ST.getMaxNumAGPRs(MF, MaxArchVGPRs);
+
+  assert(MaxAGPRs + MaxArchVGPRs == MaxVGPRs);
+
   if (PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) > MaxVGPRs ||
       PressureAfter.getArchVGPRNum() > MaxArchVGPRs ||
       PressureAfter.getAGPRNum() > MaxArchVGPRs ||
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 94cd795bbc8f6..73938dc852a37 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -53,7 +53,8 @@ class GCNSchedStrategy : public GenericScheduler {
   void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop,
                      const RegPressureTracker &RPTracker,
                      const SIRegisterInfo *SRI, unsigned SGPRPressure,
-                     unsigned VGPRPressure, bool IsBottomUp);
+                     unsigned VGPRPressure, unsigned AGPRPressure,
+                     bool IsBottomUp);
 
   std::vector<unsigned> Pressure;
 
@@ -63,6 +64,8 @@ class GCNSchedStrategy : public GenericScheduler {
 
   unsigned VGPRExcessLimit;
 
+  unsigned AGPRExcessLimit;
+
   unsigned TargetOccupancy;
 
   MachineFunction *MF;
@@ -103,6 +106,8 @@ class GCNSchedStrategy : public GenericScheduler {
 
   unsigned VGPRCriticalLimit;
 
+  unsigned AGPRCriticalLimit;
+
   unsigned SGPRLimitBias = 0;
 
   unsigned VGPRLimitBias = 0;
@@ -183,8 +188,7 @@ class ScheduleMetrics {
 };
 
 inline raw_ostream &operator<<(raw_ostream &OS, const ScheduleMetrics &Sm) {
-  dbgs() << "\n Schedule Metric (scaled by "
-         << ScheduleMetrics::ScaleFactor
+  dbgs() << "\n Schedule Metric (scaled by " << ScheduleMetrics::ScaleFactor
          << " ) is: " << Sm.getMetric() << " [ " << Sm.getBubbles() << "/"
          << Sm.getLength() << " ]\n";
   return OS;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
index 7b8f0f44cbe2c..78e8d03fb10f5 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
@@ -533,6 +533,11 @@ unsigned GCNSubtarget::getMaxNumVGPRs(const Function &F) const {
           getMaxNumVGPRs(Waves.first, DynamicVGPRBlockSize)});
 }
 
+unsigned GCNSubtarget::getMaxNumAGPRs(const Function &F,
+                                      unsigned ArchVGPRs) const {
+  return getTotalNumVGPRs() - ArchVGPRs;
+}
+
 unsigned GCNSubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
   return getMaxNumVGPRs(MF.getFunction());
 }
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 268162bcada47..b1421fa4bad88 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1632,8 +1632,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   /// unit requirement.
   unsigned getMaxNumVGPRs(const Function &F) const;
 
-  unsigned getMaxNumAGPRs(const Function &F) const {
-    return getMaxNumVGPRs(F);
+  unsigned getMaxNumAGPRs(const Function &F, unsigned ArchVGPRs) const;
+
+  unsigned getMaxNumAGPRs(unsigned WavesPerEU) const {
+    return AMDGPU::IsaInfo::getMaxNumAGPRs(this, WavesPerEU);
   }
 
   /// \returns Maximum number of VGPRs that meets number of waves per execution
@@ -1646,13 +1648,13 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   /// unit requirement.
   unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
 
-  bool isWave32() const {
-    return getWavefrontSize() == 32;
+  unsigned getMaxNumAGPRs(const MachineFunction &MF, unsigned ArchVGPRs) const {
+    return getMaxNumAGPRs(MF.getFunction(), ArchVGPRs);
   }
 
-  bool isWave64() const {
-    return getWavefrontSize() == 64;
-  }
+  bool isWave32() const { return getWavefrontSize() == 32; }
+
+  bool isWave64() const { return getWavefrontSize() == 64; }
 
   /// Returns if the wavesize of this subtarget is known reliable. This is false
   /// only for the a default target-cpu that does not have an explicit
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 77258810dd68c..ea7345007e86e 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1449,6 +1449,15 @@ unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
   return std::min(MaxNumVGPRs, AddressableNumVGPRs);
 }
 
+unsigned getMaxNumAGPRs(const MCSubtargetInfo *STI, unsigned int WavesPerEU) {
+  assert(WavesPerEU != 0);
+
+  unsigned MaxNumAGPRs = getTotalNumAGPRs() / WavesPerEU;
+  unsigned AddressableNumAGPRs =
+      getTotalNumVGPRs(STI) - getAddressableNumArchVGPRs(STI);
+  return std::min(MaxNumAGPRs, AddressableNumAGPRs);
+}
+
 unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
                                  std::optional<bool> EnableWavefrontSize32) {
   return getGranulatedNumRegisterBlocks(
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index c9d2c286bf237..c28454cdab20c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -352,6 +352,14 @@ unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
                                       unsigned MaxWaves,
                                       unsigned TotalNumVGPRs);
 
+// TODO: Is this accurate for all subtargets?
+// Remove constexpr later
+constexpr unsigned getTotalNumAGPRs() { return 256; }
+
+/// \returns Maximum number of AGPRs that meets given number of waves per
+/// execution unit requirement for given subtarget \p STI.
+unsigned getMaxNumAGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
+
 /// \returns Occupancy for a given \p SGPRs usage, \p MaxWaves possible, and \p
 /// Gen.
 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,

>From dc3814ecd39feab5137b4a6a1a3ffbbe85506cec Mon Sep 17 00:00:00 2001
From: Nicholas Baron <nicholas.baron.ten at gmail.com>
Date: Wed, 23 Jul 2025 12:44:01 -0500
Subject: [PATCH 2/4] Divide the VGPR count by the number of waves in an EU

---
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 2 +-
 llvm/lib/Target/AMDGPU/GCNSubtarget.cpp     | 7 ++++---
 llvm/lib/Target/AMDGPU/GCNSubtarget.h       | 8 +++++---
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 6d529110676ae..0dbb492efa26b 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1360,7 +1360,7 @@ void GCNSchedStage::checkScheduling() {
   unsigned MaxArchVGPRs = std::min(MaxVGPRs, ST.getAddressableNumArchVGPRs());
   unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
 
-  unsigned MaxAGPRs = ST.getMaxNumAGPRs(MF, MaxArchVGPRs);
+  unsigned MaxAGPRs = ST.getMaxNumAGPRs(MF, MaxArchVGPRs, WavesAfter);
 
   assert(MaxAGPRs + MaxArchVGPRs == MaxVGPRs);
 
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
index 78e8d03fb10f5..d24b47983bb06 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
@@ -533,9 +533,10 @@ unsigned GCNSubtarget::getMaxNumVGPRs(const Function &F) const {
           getMaxNumVGPRs(Waves.first, DynamicVGPRBlockSize)});
 }
 
-unsigned GCNSubtarget::getMaxNumAGPRs(const Function &F,
-                                      unsigned ArchVGPRs) const {
-  return getTotalNumVGPRs() - ArchVGPRs;
+unsigned GCNSubtarget::getMaxNumAGPRs(const Function &F, unsigned ArchVGPRs,
+                                      unsigned WavesPerEU) const {
+  assert(WavesPerEU > 0);
+  return (getTotalNumVGPRs() - ArchVGPRs) / WavesPerEU;
 }
 
 unsigned GCNSubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index b1421fa4bad88..6387ade234ffe 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1632,7 +1632,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   /// unit requirement.
   unsigned getMaxNumVGPRs(const Function &F) const;
 
-  unsigned getMaxNumAGPRs(const Function &F, unsigned ArchVGPRs) const;
+  unsigned getMaxNumAGPRs(const Function &F, unsigned ArchVGPRs,
+                          unsigned WavesPerEU) const;
 
   unsigned getMaxNumAGPRs(unsigned WavesPerEU) const {
     return AMDGPU::IsaInfo::getMaxNumAGPRs(this, WavesPerEU);
@@ -1648,8 +1649,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   /// unit requirement.
   unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
 
-  unsigned getMaxNumAGPRs(const MachineFunction &MF, unsigned ArchVGPRs) const {
-    return getMaxNumAGPRs(MF.getFunction(), ArchVGPRs);
+  unsigned getMaxNumAGPRs(const MachineFunction &MF, unsigned ArchVGPRs,
+                          unsigned WavesPerEU) const {
+    return getMaxNumAGPRs(MF.getFunction(), ArchVGPRs, WavesPerEU);
   }
 
   bool isWave32() const { return getWavefrontSize() == 32; }

>From be6fb1547ac5483a6d1abcf23f48fee898b486fc Mon Sep 17 00:00:00 2001
From: Nicholas Baron <nicholas.baron.ten at gmail.com>
Date: Wed, 23 Jul 2025 12:45:26 -0500
Subject: [PATCH 3/4] Track AGPRs only if we are not tracking VGPRs

---
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 0dbb492efa26b..551f00ab34437 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -286,8 +286,8 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
 
   // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
   static constexpr unsigned MaxVGPRPressureInc = 16;
-  bool ShouldTrackAGPRs = AGPRPressure >= AGPRExcessLimit;
   bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
+  bool ShouldTrackAGPRs = !ShouldTrackVGPRs && AGPRPressure >= AGPRExcessLimit;
   bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
 
   // FIXME: We have to enter REG-EXCESS before we reach the actual threshold

>From cb1e54d92c25a40367fc0b5d0365874b2750653e Mon Sep 17 00:00:00 2001
From: Nicholas Baron <nicholas.baron.ten at gmail.com>
Date: Wed, 23 Jul 2025 12:50:00 -0500
Subject: [PATCH 4/4] Remove unneeded calculations

---
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 551f00ab34437..c554ff664e842 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1360,10 +1360,6 @@ void GCNSchedStage::checkScheduling() {
   unsigned MaxArchVGPRs = std::min(MaxVGPRs, ST.getAddressableNumArchVGPRs());
   unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
 
-  unsigned MaxAGPRs = ST.getMaxNumAGPRs(MF, MaxArchVGPRs, WavesAfter);
-
-  assert(MaxAGPRs + MaxArchVGPRs == MaxVGPRs);
-
   if (PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) > MaxVGPRs ||
       PressureAfter.getArchVGPRNum() > MaxArchVGPRs ||
       PressureAfter.getAGPRNum() > MaxArchVGPRs ||