[llvm] [AMDGPU] Replace dynamic VGPR feature with attribute (PR #133444)

via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 28 07:01:53 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Diana Picus (rovka)

<details>
<summary>Changes</summary>

Use a function attribute (amdgpu-dynamic-vgpr) instead of a subtarget feature, as requested in #<!-- -->130030.

---

Patch is 99.47 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/133444.diff


30 Files Affected:

- (modified) llvm/docs/AMDGPUUsage.rst (+5-5) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (-6) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (+9-9) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp (+5-1) 
- (modified) llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp (+15-12) 
- (modified) llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp (+1-1) 
- (modified) llvm/lib/Target/AMDGPU/GCNRegPressure.cpp (+8-6) 
- (modified) llvm/lib/Target/AMDGPU/GCNRegPressure.h (+6-6) 
- (modified) llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp (+20-18) 
- (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.cpp (+14-6) 
- (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+9-12) 
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp (+2-1) 
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h (+1) 
- (modified) llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp (+1-1) 
- (modified) llvm/lib/Target/AMDGPU/SIFrameLowering.cpp (+3-3) 
- (modified) llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp (+4-4) 
- (modified) llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (+4) 
- (modified) llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h (+8) 
- (modified) llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp (+1-1) 
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+17-17) 
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+6-6) 
- (modified) llvm/test/CodeGen/AMDGPU/dynamic-vgpr-reserve-stack-for-cwsr.ll (+5-5) 
- (modified) llvm/test/CodeGen/AMDGPU/machine-function-info-cwsr.ll (+2-2) 
- (added) llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable-dvgpr.ll (+305) 
- (modified) llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll (-2) 
- (added) llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-dvgpr.ll (+205) 
- (modified) llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll (+4-9) 
- (added) llvm/test/CodeGen/AMDGPU/release-vgprs-gfx12-dvgpr.mir (+340) 
- (modified) llvm/test/CodeGen/AMDGPU/release-vgprs-gfx12.mir (+19-38) 
- (modified) llvm/unittests/Target/AMDGPU/AMDGPUUnitTests.cpp (+19-19) 


``````````diff
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index ab507e3714ebb..daedc324c5d0a 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -759,11 +759,6 @@ For example:
                                                   performant than code generated for XNACK replay
                                                   disabled.
 
-     dynamic-vgpr    TODO                         Represents the "Dynamic VGPR" hardware mode, introduced in GFX12.
-                                                  Waves launched in this mode may allocate or deallocate the VGPRs
-                                                  using dedicated instructions, but may not send the DEALLOC_VGPRS
-                                                  message.
-
      =============== ============================ ==================================================
 
 .. _amdgpu-target-id:
@@ -1743,6 +1738,11 @@ The AMDGPU backend supports the following LLVM IR attributes.
 
      "amdgpu-promote-alloca-to-vector-vgpr-ratio"     Ratio of VGPRs to budget for promoting alloca to vectors.
 
+     "amdgpu-dynamic-vgpr"                            Represents the "Dynamic VGPR" hardware mode, introduced in GFX12.
+                                                      Waves launched in this mode may allocate or deallocate the VGPRs
+                                                      using dedicated instructions, but may not send the DEALLOC_VGPRS
+                                                      message.
+
      ================================================ ==========================================================
 
 Calling Conventions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 84619dd656f35..8bd9fcd8e04e7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1257,12 +1257,6 @@ def FeatureXF32Insts : SubtargetFeature<"xf32-insts",
    "v_mfma_f32_16x16x8_xf32 and v_mfma_f32_32x32x4_xf32"
  >;
 
-def FeatureDynamicVGPR : SubtargetFeature <"dynamic-vgpr",
-  "DynamicVGPR",
-  "true",
-  "Enable dynamic VGPR mode"
->;
-
 def FeatureDynamicVGPRBlockSize32 : SubtargetFeature<"dynamic-vgpr-block-size-32",
   "DynamicVGPRBlockSize32",
   "true",
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 800e2b9c0e657..8e3f5aef8bd05 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -452,13 +452,13 @@ void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
       uint64_t TotalNumVgpr =
           getTotalNumVGPRs(STM.hasGFX90AInsts(), NumAgpr, NumVgpr);
       uint64_t NumVGPRsForWavesPerEU = std::max(
-          {TotalNumVgpr, (uint64_t)1, (uint64_t)STM.getMinNumVGPRs(MaxWaves)});
+          {TotalNumVgpr, (uint64_t)1, (uint64_t)STM.getMinNumVGPRs(MaxWaves, MFI.isDynamicVGPREnabled())});
       uint64_t NumSGPRsForWavesPerEU = std::max(
           {NumSgpr, (uint64_t)1, (uint64_t)STM.getMinNumSGPRs(MaxWaves)});
       const MCExpr *OccupancyExpr = AMDGPUMCExpr::createOccupancy(
           STM.getOccupancyWithWorkGroupSizes(*MF).second,
           MCConstantExpr::create(NumSGPRsForWavesPerEU, OutContext),
-          MCConstantExpr::create(NumVGPRsForWavesPerEU, OutContext), STM,
+          MCConstantExpr::create(NumVGPRsForWavesPerEU, OutContext), MFI.isDynamicVGPREnabled(), STM,
           OutContext);
       uint64_t Occupancy;
 
@@ -1081,7 +1081,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
                               Ctx);
   ProgInfo.NumVGPRsForWavesPerEU =
       AMDGPUMCExpr::createMax({ProgInfo.NumVGPR, CreateExpr(1ul),
-                               CreateExpr(STM.getMinNumVGPRs(MaxWaves))},
+                               CreateExpr(STM.getMinNumVGPRs(MaxWaves, MFI->isDynamicVGPREnabled()))},
                               Ctx);
 
   if (STM.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ||
@@ -1255,7 +1255,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
 
   ProgInfo.Occupancy = AMDGPUMCExpr::createOccupancy(
       STM.computeOccupancy(F, ProgInfo.LDSSize).second,
-      ProgInfo.NumSGPRsForWavesPerEU, ProgInfo.NumVGPRsForWavesPerEU, STM, Ctx);
+      ProgInfo.NumSGPRsForWavesPerEU, ProgInfo.NumVGPRsForWavesPerEU, MFI->isDynamicVGPREnabled(), STM, Ctx);
 
   const auto [MinWEU, MaxWEU] =
       AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu", {0, 0}, true);
@@ -1404,7 +1404,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
 // Helper function to add common PAL Metadata 3.0+
 static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD,
                                   const SIProgramInfo &CurrentProgramInfo,
-                                  CallingConv::ID CC, const GCNSubtarget &ST) {
+                                  CallingConv::ID CC, const GCNSubtarget &ST, bool IsDynamicVGPR) {
   if (ST.hasIEEEMode())
     MD->setHwStage(CC, ".ieee_mode", (bool)CurrentProgramInfo.IEEEMode);
 
@@ -1416,7 +1416,7 @@ static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD,
                    (bool)CurrentProgramInfo.TrapHandlerEnable);
     MD->setHwStage(CC, ".excp_en", CurrentProgramInfo.EXCPEnable);
 
-    if (ST.isDynamicVGPREnabled())
+    if (IsDynamicVGPR)
       MD->setComputeRegisters(".dynamic_vgpr_en", true);
   }
 
@@ -1443,7 +1443,7 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
   // For targets that support dynamic VGPRs, set the number of saved dynamic
   // VGPRs (if any) in the PAL metadata.
   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
-  if (STM.isDynamicVGPREnabled() &&
+  if (MFI->isDynamicVGPREnabled() &&
       MFI->getScratchReservedForDynamicVGPRs() > 0)
     MD->setHwStage(CC, ".dynamic_vgpr_saved_count",
                    MFI->getScratchReservedForDynamicVGPRs() / 4);
@@ -1469,7 +1469,7 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
     MD->setHwStage(CC, ".debug_mode", (bool)CurrentProgramInfo.DebugMode);
     MD->setHwStage(CC, ".scratch_en", msgpack::Type::Boolean,
                    CurrentProgramInfo.ScratchEnable);
-    EmitPALMetadataCommon(MD, CurrentProgramInfo, CC, STM);
+    EmitPALMetadataCommon(MD, CurrentProgramInfo, CC, STM, MFI->isDynamicVGPREnabled());
   }
 
   // ScratchSize is in bytes, 16 aligned.
@@ -1540,7 +1540,7 @@ void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
     MD->setRsrc2(CallingConv::AMDGPU_CS,
                  CurrentProgramInfo.getComputePGMRSrc2(Ctx), Ctx);
   } else {
-    EmitPALMetadataCommon(MD, CurrentProgramInfo, CallingConv::AMDGPU_CS, ST);
+    EmitPALMetadataCommon(MD, CurrentProgramInfo, CallingConv::AMDGPU_CS, ST, MF.getInfo<SIMachineFunctionInfo>()->isDynamicVGPREnabled());
   }
 
   // Set optional info
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 98a70c0dbb912..8f9a7aefb104e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -199,8 +199,12 @@ unsigned getMaxVGPRs(const TargetMachine &TM, const Function &F) {
   if (!TM.getTargetTriple().isAMDGCN())
     return 128;
 
+  bool IsDynamicVGPR = false;
+  if (F.hasFnAttribute("amdgpu-dynamic-vgpr"))
+    IsDynamicVGPR = F.getFnAttribute("amdgpu-dynamic-vgpr").getValueAsBool();
+
   const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
-  unsigned MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first);
+  unsigned MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first, IsDynamicVGPR);
 
   // A non-entry function has only 32 caller preserved registers.
   // Do not promote alloca which will force spilling unless we know the function
diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index da065e8d8cb6b..ded5c270e3cd9 100644
--- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -426,13 +426,14 @@ void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) {
 unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
   // TODO: assert Regions are sorted descending by pressure
   const auto &ST = MF.getSubtarget<GCNSubtarget>();
-  const auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
+  const bool IsDynamicVGPR = MF.getInfo<SIMachineFunctionInfo>()->isDynamicVGPREnabled();
+  const auto Occ = Regions.front()->MaxPressure.getOccupancy(ST, IsDynamicVGPR);
   LLVM_DEBUG(dbgs() << "Trying to improve occupancy, target = " << TargetOcc
                     << ", current = " << Occ << '\n');
 
   auto NewOcc = TargetOcc;
   for (auto *R : Regions) {
-    if (R->MaxPressure.getOccupancy(ST) >= NewOcc)
+    if (R->MaxPressure.getOccupancy(ST, IsDynamicVGPR) >= NewOcc)
       break;
 
     LLVM_DEBUG(printRegion(dbgs(), R->Begin, R->End, LIS, 3);
@@ -444,7 +445,7 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
     LLVM_DEBUG(dbgs() << "Occupancy improvement attempt:\n";
                printSchedRP(dbgs(), R->MaxPressure, MaxRP));
 
-    NewOcc = std::min(NewOcc, MaxRP.getOccupancy(ST));
+    NewOcc = std::min(NewOcc, MaxRP.getOccupancy(ST, IsDynamicVGPR));
     if (NewOcc <= Occ)
       break;
 
@@ -465,9 +466,10 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
   const auto &ST = MF.getSubtarget<GCNSubtarget>();
   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
   auto TgtOcc = MFI->getMinAllowedOccupancy();
+  bool IsDynamicVGPR = MFI->isDynamicVGPREnabled();
 
   sortRegionsByPressure(TgtOcc);
-  auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
+  auto Occ = Regions.front()->MaxPressure.getOccupancy(ST, IsDynamicVGPR);
 
   if (TryMaximizeOccupancy && Occ < TgtOcc)
     Occ = tryMaximizeOccupancy(TgtOcc);
@@ -494,19 +496,19 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
       const auto RP = getRegionPressure(*R);
       LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
 
-      if (RP.getOccupancy(ST) < TgtOcc) {
+      if (RP.getOccupancy(ST, IsDynamicVGPR) < TgtOcc) {
         LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
         if (R->BestSchedule.get() &&
-            R->BestSchedule->MaxPressure.getOccupancy(ST) >= TgtOcc) {
+            R->BestSchedule->MaxPressure.getOccupancy(ST, IsDynamicVGPR) >= TgtOcc) {
           LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
           scheduleBest(*R);
         } else {
           LLVM_DEBUG(dbgs() << ", restoring\n");
           Ovr.restoreOrder();
-          assert(R->MaxPressure.getOccupancy(ST) >= TgtOcc);
+          assert(R->MaxPressure.getOccupancy(ST, IsDynamicVGPR) >= TgtOcc);
         }
       }
-      FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(ST));
+      FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(ST, IsDynamicVGPR));
     }
   }
   MFI->limitOccupancy(FinalOccupancy);
@@ -552,9 +554,10 @@ void GCNIterativeScheduler::scheduleILP(
   const auto &ST = MF.getSubtarget<GCNSubtarget>();
   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
   auto TgtOcc = MFI->getMinAllowedOccupancy();
+  bool IsDynamicVGPR = MFI->isDynamicVGPREnabled();
 
   sortRegionsByPressure(TgtOcc);
-  auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
+  auto Occ = Regions.front()->MaxPressure.getOccupancy(ST, IsDynamicVGPR);
 
   if (TryMaximizeOccupancy && Occ < TgtOcc)
     Occ = tryMaximizeOccupancy(TgtOcc);
@@ -572,17 +575,17 @@ void GCNIterativeScheduler::scheduleILP(
     const auto RP = getSchedulePressure(*R, ILPSchedule);
     LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
 
-    if (RP.getOccupancy(ST) < TgtOcc) {
+    if (RP.getOccupancy(ST, IsDynamicVGPR) < TgtOcc) {
       LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
       if (R->BestSchedule.get() &&
-        R->BestSchedule->MaxPressure.getOccupancy(ST) >= TgtOcc) {
+        R->BestSchedule->MaxPressure.getOccupancy(ST, IsDynamicVGPR) >= TgtOcc) {
         LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
         scheduleBest(*R);
       }
     } else {
       scheduleRegion(*R, ILPSchedule, RP);
       LLVM_DEBUG(printSchedResult(dbgs(), R, RP));
-      FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(ST));
+      FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(ST, IsDynamicVGPR));
     }
   }
   MFI->limitOccupancy(FinalOccupancy);
diff --git a/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp b/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
index 13eb0ca539a4c..f3fd6c8564693 100644
--- a/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
@@ -251,7 +251,7 @@ bool GCNNSAReassignImpl::run(MachineFunction &MF) {
 
   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
   MaxNumVGPRs = ST->getMaxNumVGPRs(MF);
-  MaxNumVGPRs = std::min(ST->getMaxNumVGPRs(MFI->getOccupancy()), MaxNumVGPRs);
+  MaxNumVGPRs = std::min(ST->getMaxNumVGPRs(MFI->getOccupancy(), MFI->isDynamicVGPREnabled()), MaxNumVGPRs);
   CSRegs = MRI->getCalleeSavedRegs();
 
   using Candidate = std::pair<const MachineInstr*, bool>;
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index f74d12cfab0c0..3088dd2f65a8c 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -13,6 +13,7 @@
 
 #include "GCNRegPressure.h"
 #include "AMDGPU.h"
+#include "SIMachineFunctionInfo.h"
 #include "llvm/CodeGen/RegisterPressure.h"
 
 using namespace llvm;
@@ -92,17 +93,18 @@ void GCNRegPressure::inc(unsigned Reg,
 bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
                           unsigned MaxOccupancy) const {
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+  bool IsDynamicVGPR = MF.getInfo<SIMachineFunctionInfo>()->isDynamicVGPREnabled();
 
   const auto SGPROcc = std::min(MaxOccupancy,
                                 ST.getOccupancyWithNumSGPRs(getSGPRNum()));
   const auto VGPROcc =
     std::min(MaxOccupancy,
-             ST.getOccupancyWithNumVGPRs(getVGPRNum(ST.hasGFX90AInsts())));
+             ST.getOccupancyWithNumVGPRs(getVGPRNum(ST.hasGFX90AInsts()), IsDynamicVGPR));
   const auto OtherSGPROcc = std::min(MaxOccupancy,
                                 ST.getOccupancyWithNumSGPRs(O.getSGPRNum()));
   const auto OtherVGPROcc =
     std::min(MaxOccupancy,
-             ST.getOccupancyWithNumVGPRs(O.getVGPRNum(ST.hasGFX90AInsts())));
+             ST.getOccupancyWithNumVGPRs(O.getVGPRNum(ST.hasGFX90AInsts()), IsDynamicVGPR));
 
   const auto Occ = std::min(SGPROcc, VGPROcc);
   const auto OtherOcc = std::min(OtherSGPROcc, OtherVGPROcc);
@@ -224,13 +226,13 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
                           O.getVGPRNum(ST.hasGFX90AInsts()));
 }
 
-Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST) {
-  return Printable([&RP, ST](raw_ostream &OS) {
+Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST, bool IsDynamicVGPR) {
+  return Printable([&RP, ST, IsDynamicVGPR](raw_ostream &OS) {
     OS << "VGPRs: " << RP.Value[GCNRegPressure::VGPR32] << ' '
        << "AGPRs: " << RP.getAGPRNum();
     if (ST)
       OS << "(O"
-         << ST->getOccupancyWithNumVGPRs(RP.getVGPRNum(ST->hasGFX90AInsts()))
+         << ST->getOccupancyWithNumVGPRs(RP.getVGPRNum(ST->hasGFX90AInsts()), IsDynamicVGPR)
          << ')';
     OS << ", SGPRs: " << RP.getSGPRNum();
     if (ST)
@@ -238,7 +240,7 @@ Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST) {
     OS << ", LVGPR WT: " << RP.getVGPRTuplesWeight()
        << ", LSGPR WT: " << RP.getSGPRTuplesWeight();
     if (ST)
-      OS << " -> Occ: " << RP.getOccupancy(*ST);
+      OS << " -> Occ: " << RP.getOccupancy(*ST, IsDynamicVGPR);
     OS << '\n';
   });
 }
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 7554b9f578fcb..15e86c733ff60 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -67,9 +67,9 @@ struct GCNRegPressure {
                                                          Value[AGPR_TUPLE]); }
   unsigned getSGPRTuplesWeight() const { return Value[SGPR_TUPLE]; }
 
-  unsigned getOccupancy(const GCNSubtarget &ST) const {
+  unsigned getOccupancy(const GCNSubtarget &ST, bool IsDynamicVGPR) const {
     return std::min(ST.getOccupancyWithNumSGPRs(getSGPRNum()),
-             ST.getOccupancyWithNumVGPRs(getVGPRNum(ST.hasGFX90AInsts())));
+             ST.getOccupancyWithNumVGPRs(getVGPRNum(ST.hasGFX90AInsts()), IsDynamicVGPR));
   }
 
   void inc(unsigned Reg,
@@ -77,8 +77,8 @@ struct GCNRegPressure {
            LaneBitmask NewMask,
            const MachineRegisterInfo &MRI);
 
-  bool higherOccupancy(const GCNSubtarget &ST, const GCNRegPressure& O) const {
-    return getOccupancy(ST) > O.getOccupancy(ST);
+  bool higherOccupancy(const GCNSubtarget &ST, const GCNRegPressure& O, bool IsDynamicVGPR) const {
+    return getOccupancy(ST, IsDynamicVGPR) > O.getOccupancy(ST, IsDynamicVGPR);
   }
 
   /// Compares \p this GCNRegpressure to \p O, returning true if \p this is
@@ -126,7 +126,7 @@ struct GCNRegPressure {
   friend GCNRegPressure max(const GCNRegPressure &P1,
                             const GCNRegPressure &P2);
 
-  friend Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST);
+  friend Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST, bool IsDynamicVGPR);
 };
 
 inline GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2) {
@@ -395,7 +395,7 @@ GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI,
 bool isEqual(const GCNRPTracker::LiveRegSet &S1,
              const GCNRPTracker::LiveRegSet &S2);
 
-Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST = nullptr);
+Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST = nullptr, bool IsDynamicVGPR = false);
 
 Printable print(const GCNRPTracker::LiveRegSet &LiveRegs,
                 const MachineRegisterInfo &MRI);
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index ea9bc88bbe86b..f2900beb5234c 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -95,16 +95,16 @@ void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
 
   if (!KnownExcessRP) {
     VGPRCriticalLimit =
-        std::min(ST.getMaxNumVGPRs(TargetOccupancy), VGPRExcessLimit);
+        std::min(ST.getMaxNumVGPRs(TargetOccupancy, MFI.isDynamicVGPREnabled()), VGPRExcessLimit);
   } else {
     // This is similar to ST.getMaxNumVGPRs(TargetOccupancy) result except
     // returns a reasonably small number for targets with lots of VGPRs, such
     // as GFX10 and GFX11.
     LLVM_DEBUG(dbgs() << "Region is known to spill, use alternative "
                          "VGPRCriticalLimit calculation method.\n");
-
-    unsigned Granule = AMDGPU::IsaInfo::getVGPRAllocGranule(&ST);
-    unsigned Addressable = AMDGPU::IsaInfo::getAddressableNumVGPRs(&ST);
+    bool IsDynamicVGPR = MFI.isDynamicVGPREnabled();
+    unsigned Granule = AMDGPU::IsaInfo::getVGPRAllocGranule(&ST, IsDynamicVGPR);
+    unsigned Addressable = AMDGPU::IsaInfo::getAddressableNumVGPRs(&ST, IsDynamicVGPR);
     unsigned VGPRBudget = alignDown(Addressable / TargetOccupancy, Granule);
     VGPRBudget = std::max(VGPRBudget, Granule);
     VGPRCriticalLimit = std::min(VGPRBudget, VGPRExcessLimit);
@@ -1126,7 +1126,7 @@ void UnclusteredHighRPStage::finalizeGCNSchedStage() {
   if (DAG.MinOccupancy > InitialOccupancy) {
     for (unsigned IDX = 0; IDX < DAG.Pressure.size(); ++IDX)
       DAG.RegionsWithMinOcc[IDX] =
-          DAG.Pressure[IDX].getOccupancy(DAG.ST) == DAG.MinOccupancy;
+          DAG.Pressure[IDX].getOccupancy(DAG.ST, DAG.MFI.isDynamicVGPREnabled()) == DAG.MinOccupancy;
 
     LLVM_DEBUG(dbgs() << StageID
                       << " stage successfully increased occupancy to "
@@ -1266,11 +1266,13 @@ void GCNSchedStage::checkScheduling() {
   LLVM_DEBUG(dbgs() << "Pressure after scheduling: " << print(PressureAfter));
   LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n");
 
+  bool IsDynamicVGPR = DAG.MFI.i...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/133444


More information about the llvm-commits mailing list