[llvm] AMDGPU: Track AGPR pressure (PR #150288)

Thu Aug 14 15:58:11 PDT 2025

https://github.com/Nicholas-Baron updated https://github.com/llvm/llvm-project/pull/150288

>From 3909b876de17b456edc8aa2a39318f2bffddd712 Mon Sep 17 00:00:00 2001
From: Nicholas Baron <Nicholas.Baron at amd.com>
Date: Mon, 21 Jul 2025 17:31:17 -0500
Subject: [PATCH 1/4] AMDGPU: Begin tracking AGPR pressure

Reuse VGPR helpers for AGPR calculations, since in the common case of
unified register files, they are somewhat interchangeable
---
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp   | 45 ++++++++++++++++---
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.h     | 10 +++--
 llvm/lib/Target/AMDGPU/GCNSubtarget.cpp       |  6 +++
 llvm/lib/Target/AMDGPU/GCNSubtarget.h         | 18 +++++---
 .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp    | 14 ++++++
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h |  8 ++++
 6 files changed, 85 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index a6553083d722b..0dbb492efa26b 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -86,6 +86,8 @@ void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
       Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
   VGPRExcessLimit =
       Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
+  AGPRExcessLimit =
+      Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::AGPR_32RegClass);
 
   SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
   // Set the initial TargetOccupnacy to the maximum occupancy that we can
@@ -98,6 +100,9 @@ void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
   SGPRCriticalLimit =
       std::min(ST.getMaxNumSGPRs(TargetOccupancy, true), SGPRExcessLimit);
 
+  AGPRCriticalLimit =
+      std::min(ST.getMaxNumAGPRs(TargetOccupancy), AGPRExcessLimit);
+
   if (!KnownExcessRP) {
     VGPRCriticalLimit = std::min(
         ST.getMaxNumVGPRs(TargetOccupancy, MFI.getDynamicVGPRBlockSize()),
@@ -201,7 +206,8 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
                                      const RegPressureTracker &RPTracker,
                                      const SIRegisterInfo *SRI,
                                      unsigned SGPRPressure,
-                                     unsigned VGPRPressure, bool IsBottomUp) {
+                                     unsigned VGPRPressure,
+                                     unsigned AGPRPressure, bool IsBottomUp) {
   Cand.SU = SU;
   Cand.AtTop = AtTop;
 
@@ -230,6 +236,7 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
     Pressure.resize(4, 0);
     Pressure[AMDGPU::RegisterPressureSets::SReg_32] = SGPRPressure;
     Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = VGPRPressure;
+    Pressure[AMDGPU::RegisterPressureSets::AGPR_32] = AGPRPressure;
 
     for (const auto &Diff : DAG->getPressureDiff(SU)) {
       if (!Diff.isValid())
@@ -247,7 +254,9 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
     if (Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=
             CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||
         Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=
-            CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32]) {
+            CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] ||
+        Pressure[AMDGPU::RegisterPressureSets::AGPR_32] !=
+            CheckPressure[AMDGPU::RegisterPressureSets::AGPR_32]) {
       errs() << "Register Pressure is inaccurate when calculated through "
                 "PressureDiff\n"
              << "SGPR got " << Pressure[AMDGPU::RegisterPressureSets::SReg_32]
@@ -255,7 +264,10 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
              << CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] << "\n"
              << "VGPR got " << Pressure[AMDGPU::RegisterPressureSets::VGPR_32]
              << ", expected "
-             << CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n";
+             << CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n"
+             << "AGPR got " << Pressure[AMDGPU::RegisterPressureSets::AGPR_32]
+             << ", expected "
+             << CheckPressure[AMDGPU::RegisterPressureSets::AGPR_32] << "\n";
       report_fatal_error("inaccurate register pressure calculation");
     }
 #endif
@@ -263,6 +275,7 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
 
   unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
   unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
+  unsigned NewAGPRPressure = Pressure[AMDGPU::RegisterPressureSets::AGPR_32];
 
   // If two instructions increase the pressure of different register sets
   // by the same amount, the generic scheduler will prefer to schedule the
@@ -272,7 +285,8 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
   // only for VGPRs or only for SGPRs.
 
   // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
-  const unsigned MaxVGPRPressureInc = 16;
+  static constexpr unsigned MaxVGPRPressureInc = 16;
+  bool ShouldTrackAGPRs = AGPRPressure >= AGPRExcessLimit;
   bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
   bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
 
@@ -291,6 +305,12 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
     Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
   }
 
+  if (ShouldTrackAGPRs && NewAGPRPressure >= AGPRPressure) {
+    HasHighPressure = true;
+    Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::AGPR_32);
+    Cand.RPDelta.Excess.setUnitInc(NewAGPRPressure - AGPRExcessLimit);
+  }
+
   if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
     HasHighPressure = true;
     Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
@@ -304,13 +324,19 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
 
   int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
   int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
+  int AGPRDelta = NewAGPRPressure - AGPRCriticalLimit;
 
-  if (SGPRDelta >= 0 || VGPRDelta >= 0) {
+  if (SGPRDelta >= 0 || VGPRDelta >= 0 || AGPRDelta >= 0) {
     HasHighPressure = true;
+    // Prioritize reducing the VGPRDelta if both are >= 0
     if (SGPRDelta > VGPRDelta) {
       Cand.RPDelta.CriticalMax =
           PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
       Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
+    } else if (AGPRDelta > VGPRDelta) {
+      Cand.RPDelta.CriticalMax =
+          PressureChange(AMDGPU::RegisterPressureSets::AGPR_32);
+      Cand.RPDelta.CriticalMax.setUnitInc(AGPRDelta);
     } else {
       Cand.RPDelta.CriticalMax =
           PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
@@ -330,16 +356,19 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
   ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
   unsigned SGPRPressure = 0;
   unsigned VGPRPressure = 0;
+  unsigned AGPRPressure = 0;
   if (DAG->isTrackingPressure()) {
     if (!GCNTrackers) {
       SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
       VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
+      AGPRPressure = Pressure[AMDGPU::RegisterPressureSets::AGPR_32];
     } else {
       GCNRPTracker *T = IsBottomUp
                             ? static_cast<GCNRPTracker *>(&UpwardTracker)
                             : static_cast<GCNRPTracker *>(&DownwardTracker);
       SGPRPressure = T->getPressure().getSGPRNum();
       VGPRPressure = T->getPressure().getArchVGPRNum();
+      AGPRPressure = T->getPressure().getAGPRNum();
     }
   }
   ReadyQueue &Q = Zone.Available;
@@ -347,7 +376,7 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
 
     SchedCandidate TryCand(ZonePolicy);
     initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure,
-                  VGPRPressure, IsBottomUp);
+                  VGPRPressure, AGPRPressure, IsBottomUp);
     // Pass SchedBoundary only when comparing nodes from the same boundary.
     SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
     tryCandidate(Cand, TryCand, ZoneArg);
@@ -1331,6 +1360,10 @@ void GCNSchedStage::checkScheduling() {
   unsigned MaxArchVGPRs = std::min(MaxVGPRs, ST.getAddressableNumArchVGPRs());
   unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
 
+  unsigned MaxAGPRs = ST.getMaxNumAGPRs(MF, MaxArchVGPRs, WavesAfter);
+
+  assert(MaxAGPRs + MaxArchVGPRs == MaxVGPRs);
+
   if (PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) > MaxVGPRs ||
       PressureAfter.getArchVGPRNum() > MaxArchVGPRs ||
       PressureAfter.getAGPRNum() > MaxArchVGPRs ||
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 94cd795bbc8f6..73938dc852a37 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -53,7 +53,8 @@ class GCNSchedStrategy : public GenericScheduler {
   void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop,
                      const RegPressureTracker &RPTracker,
                      const SIRegisterInfo *SRI, unsigned SGPRPressure,
-                     unsigned VGPRPressure, bool IsBottomUp);
+                     unsigned VGPRPressure, unsigned AGPRPressure,
+                     bool IsBottomUp);
 
   std::vector<unsigned> Pressure;
 
@@ -63,6 +64,8 @@ class GCNSchedStrategy : public GenericScheduler {
 
   unsigned VGPRExcessLimit;
 
+  unsigned AGPRExcessLimit;
+
   unsigned TargetOccupancy;
 
   MachineFunction *MF;
@@ -103,6 +106,8 @@ class GCNSchedStrategy : public GenericScheduler {
 
   unsigned VGPRCriticalLimit;
 
+  unsigned AGPRCriticalLimit;
+
   unsigned SGPRLimitBias = 0;
 
   unsigned VGPRLimitBias = 0;
@@ -183,8 +188,7 @@ class ScheduleMetrics {
 };
 
 inline raw_ostream &operator<<(raw_ostream &OS, const ScheduleMetrics &Sm) {
-  dbgs() << "\n Schedule Metric (scaled by "
-         << ScheduleMetrics::ScaleFactor
+  dbgs() << "\n Schedule Metric (scaled by " << ScheduleMetrics::ScaleFactor
          << " ) is: " << Sm.getMetric() << " [ " << Sm.getBubbles() << "/"
          << Sm.getLength() << " ]\n";
   return OS;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
index 7b8f0f44cbe2c..d24b47983bb06 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
@@ -533,6 +533,12 @@ unsigned GCNSubtarget::getMaxNumVGPRs(const Function &F) const {
           getMaxNumVGPRs(Waves.first, DynamicVGPRBlockSize)});
 }
 
+unsigned GCNSubtarget::getMaxNumAGPRs(const Function &F, unsigned ArchVGPRs,
+                                      unsigned WavesPerEU) const {
+  assert(WavesPerEU > 0);
+  return (getTotalNumVGPRs() - ArchVGPRs) / WavesPerEU;
+}
+
 unsigned GCNSubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
   return getMaxNumVGPRs(MF.getFunction());
 }
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index cbc517d3da680..898847d3eb379 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1635,8 +1635,11 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   /// unit requirement.
   unsigned getMaxNumVGPRs(const Function &F) const;
 
-  unsigned getMaxNumAGPRs(const Function &F) const {
-    return getMaxNumVGPRs(F);
+  unsigned getMaxNumAGPRs(const Function &F, unsigned ArchVGPRs,
+                          unsigned WavesPerEU) const;
+
+  unsigned getMaxNumAGPRs(unsigned WavesPerEU) const {
+    return AMDGPU::IsaInfo::getMaxNumAGPRs(this, WavesPerEU);
   }
 
   /// \returns Maximum number of VGPRs that meets number of waves per execution
@@ -1649,13 +1652,14 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   /// unit requirement.
   unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
 
-  bool isWave32() const {
-    return getWavefrontSize() == 32;
+  unsigned getMaxNumAGPRs(const MachineFunction &MF, unsigned ArchVGPRs,
+                          unsigned WavesPerEU) const {
+    return getMaxNumAGPRs(MF.getFunction(), ArchVGPRs, WavesPerEU);
   }
 
-  bool isWave64() const {
-    return getWavefrontSize() == 64;
-  }
+  bool isWave32() const { return getWavefrontSize() == 32; }
+
+  bool isWave64() const { return getWavefrontSize() == 64; }
 
   /// Returns if the wavesize of this subtarget is known reliable. This is false
   /// only for the a default target-cpu that does not have an explicit
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index b5b3cc97569ed..c66159111903c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1472,6 +1472,20 @@ unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
   return std::min(MaxNumVGPRs, AddressableNumVGPRs);
 }
 
+unsigned getMaxNumAGPRs(const MCSubtargetInfo *STI, unsigned int WavesPerEU) {
+  if (!STI->getFeatureBits().test(FeatureMAIInsts))
+    return 0;
+
+  assert(WavesPerEU != 0);
+
+  assert(!STI->getFeatureBits().test(FeatureDynamicVGPR));
+
+  unsigned MaxNumAGPRs =
+      alignTo(getTotalNumVGPRs(STI) / WavesPerEU, getVGPRAllocGranule(STI, 0));
+  unsigned AddressableNumAGPRs = getAddressableNumArchVGPRs(STI);
+  return std::min(MaxNumAGPRs, AddressableNumAGPRs);
+}
+
 unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
                                  std::optional<bool> EnableWavefrontSize32) {
   return getGranulatedNumRegisterBlocks(
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index c09a9d694f3d8..c22bc84c5541f 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -352,6 +352,14 @@ unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
                                       unsigned MaxWaves,
                                       unsigned TotalNumVGPRs);
 
+// TODO: Is this accurate for all subtargets?
+// Remove constexpr later
+constexpr unsigned getTotalNumAGPRs() { return 256; }
+
+/// \returns Maximum number of AGPRs that meets given number of waves per
+/// execution unit requirement for given subtarget \p STI.
+unsigned getMaxNumAGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
+
 /// \returns Occupancy for a given \p SGPRs usage, \p MaxWaves possible, and \p
 /// Gen.
 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,

>From acb3768b2ca75d7373032fa9b19642f9d18f560a Mon Sep 17 00:00:00 2001
From: Nicholas Baron <nicholas.baron.ten at gmail.com>
Date: Wed, 23 Jul 2025 12:45:26 -0500
Subject: [PATCH 2/4] AMDGPU: Prioritize VGPRs over AGPRs over SGPRs

Track AGPRs only if we are not tracking VGPRs
Prioritize AGPR pressure over SGPR pressure
---
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 0dbb492efa26b..e3b7093d462d5 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -286,9 +286,10 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
 
   // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
   static constexpr unsigned MaxVGPRPressureInc = 16;
-  bool ShouldTrackAGPRs = AGPRPressure >= AGPRExcessLimit;
   bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
-  bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
+  bool ShouldTrackAGPRs = !ShouldTrackVGPRs && AGPRPressure >= AGPRExcessLimit;
+  bool ShouldTrackSGPRs =
+      !ShouldTrackVGPRs && !ShouldTrackAGPRs && SGPRPressure >= SGPRExcessLimit;
 
   // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
   // to increase the likelihood we don't go over the limits.  We should improve
@@ -329,7 +330,7 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
   if (SGPRDelta >= 0 || VGPRDelta >= 0 || AGPRDelta >= 0) {
     HasHighPressure = true;
     // Prioritize reducing the VGPRDelta if both are >= 0
-    if (SGPRDelta > VGPRDelta) {
+    if (SGPRDelta > VGPRDelta && SGPRDelta > AGPRDelta) {
       Cand.RPDelta.CriticalMax =
           PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
       Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);

>From d4fd1c1ab3346fef420733330b44f79c9216cfd3 Mon Sep 17 00:00:00 2001
From: Nicholas Baron <nicholas.baron.ten at gmail.com>
Date: Wed, 23 Jul 2025 12:50:00 -0500
Subject: [PATCH 3/4] Remove unneeded calculations and rename functions

---
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp     | 4 ----
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 2 ++
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h   | 7 +++----
 3 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index e3b7093d462d5..f7353520cf592 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1361,10 +1361,6 @@ void GCNSchedStage::checkScheduling() {
   unsigned MaxArchVGPRs = std::min(MaxVGPRs, ST.getAddressableNumArchVGPRs());
   unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
 
-  unsigned MaxAGPRs = ST.getMaxNumAGPRs(MF, MaxArchVGPRs, WavesAfter);
-
-  assert(MaxAGPRs + MaxArchVGPRs == MaxVGPRs);
-
   if (PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) > MaxVGPRs ||
       PressureAfter.getArchVGPRNum() > MaxArchVGPRs ||
       PressureAfter.getAGPRNum() > MaxArchVGPRs ||
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index c66159111903c..de225511372e9 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1486,6 +1486,8 @@ unsigned getMaxNumAGPRs(const MCSubtargetInfo *STI, unsigned int WavesPerEU) {
   return std::min(MaxNumAGPRs, AddressableNumAGPRs);
 }
 
+unsigned getAddressableNumAGPRs(const MCSubtargetInfo *STI) { return 256; }
+
 unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
                                  std::optional<bool> EnableWavefrontSize32) {
   return getGranulatedNumRegisterBlocks(
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index c22bc84c5541f..8f4b8afb8d750 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -352,14 +352,13 @@ unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
                                       unsigned MaxWaves,
                                       unsigned TotalNumVGPRs);
 
-// TODO: Is this accurate for all subtargets?
-// Remove constexpr later
-constexpr unsigned getTotalNumAGPRs() { return 256; }
-
 /// \returns Maximum number of AGPRs that meets given number of waves per
 /// execution unit requirement for given subtarget \p STI.
 unsigned getMaxNumAGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
 
+/// \returns Addressable number of AGPRs for a given subtarget \p STI.
+unsigned getAddressableNumAGPRs(const MCSubtargetInfo *STI);
+
 /// \returns Occupancy for a given \p SGPRs usage, \p MaxWaves possible, and \p
 /// Gen.
 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,

>From fa490f377898200710da7dacd1a53fa0955e9741 Mon Sep 17 00:00:00 2001
From: Nicholas Baron <nicholas.baron.ten at gmail.com>
Date: Thu, 14 Aug 2025 17:57:41 -0500
Subject: [PATCH 4/4] AMDGPU: Remove unused helpers

---
 llvm/lib/Target/AMDGPU/GCNSubtarget.cpp | 6 ------
 llvm/lib/Target/AMDGPU/GCNSubtarget.h   | 8 --------
 2 files changed, 14 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
index d24b47983bb06..7b8f0f44cbe2c 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
@@ -533,12 +533,6 @@ unsigned GCNSubtarget::getMaxNumVGPRs(const Function &F) const {
           getMaxNumVGPRs(Waves.first, DynamicVGPRBlockSize)});
 }
 
-unsigned GCNSubtarget::getMaxNumAGPRs(const Function &F, unsigned ArchVGPRs,
-                                      unsigned WavesPerEU) const {
-  assert(WavesPerEU > 0);
-  return (getTotalNumVGPRs() - ArchVGPRs) / WavesPerEU;
-}
-
 unsigned GCNSubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
   return getMaxNumVGPRs(MF.getFunction());
 }
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 898847d3eb379..bc2468ccf3c55 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1635,9 +1635,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   /// unit requirement.
   unsigned getMaxNumVGPRs(const Function &F) const;
 
-  unsigned getMaxNumAGPRs(const Function &F, unsigned ArchVGPRs,
-                          unsigned WavesPerEU) const;
-
   unsigned getMaxNumAGPRs(unsigned WavesPerEU) const {
     return AMDGPU::IsaInfo::getMaxNumAGPRs(this, WavesPerEU);
   }
@@ -1652,11 +1649,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   /// unit requirement.
   unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
 
-  unsigned getMaxNumAGPRs(const MachineFunction &MF, unsigned ArchVGPRs,
-                          unsigned WavesPerEU) const {
-    return getMaxNumAGPRs(MF.getFunction(), ArchVGPRs, WavesPerEU);
-  }
-
   bool isWave32() const { return getWavefrontSize() == 32; }
 
   bool isWave64() const { return getWavefrontSize() == 64; }