[PATCH] D134723: [AMDGPU] Set memory bound occupancy based on addressable VGPRs

Carl Ritson via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 27 02:06:19 PDT 2022


critson created this revision.
critson added reviewers: foad, rampitec, Joe_Nash.
Herald added subscribers: kosarev, kerbowa, javed.absar, hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, jvesely, kzhuravl, arsenm.
Herald added a project: All.
critson requested review of this revision.
Herald added subscribers: llvm-commits, wdng.
Herald added a project: LLVM.

With GFX11 the minimum occupancy (for memory bound) shaders should
be set based on the number of addressable VGPRs and total VGPRs,
i.e. it should be higher than 4 in many cases to avoid spilling.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D134723

Files:
  llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
  llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
  llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
  llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h


Index: llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
===================================================================
--- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -959,10 +959,13 @@
     return Occupancy;
   }
 
-  unsigned getMinAllowedOccupancy() const {
+  unsigned getMinAllowedOccupancy(const GCNSubtarget &ST) const {
     if (!isMemoryBound() && !needsWaveLimiter())
       return Occupancy;
-    return (Occupancy < 4) ? Occupancy : 4;
+    unsigned spillThreshold = ST.getTotalNumVGPRs() / ST.getAddressableNumVGPRs();
+    if (spillThreshold < 4)
+      spillThreshold = 4;
+    return (Occupancy < spillThreshold) ? Occupancy : spillThreshold;
   }
 
   void limitOccupancy(const MachineFunction &MF);
Index: llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
+++ llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
@@ -205,7 +205,7 @@
   // operands due to the early clobber we will introduce. Third, the pressure
   // tracking does not account for the alignment requirements for SGPRs, or the
   // fragmentation of registers the allocator will need to satisfy.
-  if (Occupancy >= MFI->getMinAllowedOccupancy() &&
+  if (Occupancy >= MFI->getMinAllowedOccupancy(*ST) &&
       MaxPressure.getVGPRNum(ST->hasGFX90AInsts()) <= MaxVGPRs / 2 &&
       MaxPressure.getSGPRNum() <= MaxSGPRs / 2) {
     LastRecordedOccupancy = Occupancy;
Index: llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -892,9 +892,9 @@
   // Allow memory bound functions to drop to 4 waves if not limited by an
   // attribute.
   if (WavesAfter < WavesBefore && WavesAfter < DAG.MinOccupancy &&
-      WavesAfter >= MFI.getMinAllowedOccupancy()) {
+      WavesAfter >= MFI.getMinAllowedOccupancy(ST)) {
     LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
-                      << MFI.getMinAllowedOccupancy() << " waves\n");
+                      << MFI.getMinAllowedOccupancy(ST) << " waves\n");
     NewOccupancy = WavesAfter;
   }
 
Index: llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -472,7 +472,7 @@
   bool TryMaximizeOccupancy) {
   const auto &ST = MF.getSubtarget<GCNSubtarget>();
   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-  auto TgtOcc = MFI->getMinAllowedOccupancy();
+  auto TgtOcc = MFI->getMinAllowedOccupancy(ST);
 
   sortRegionsByPressure(TgtOcc);
   auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
@@ -560,7 +560,7 @@
   bool TryMaximizeOccupancy) {
   const auto &ST = MF.getSubtarget<GCNSubtarget>();
   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-  auto TgtOcc = MFI->getMinAllowedOccupancy();
+  auto TgtOcc = MFI->getMinAllowedOccupancy(ST);
 
   sortRegionsByPressure(TgtOcc);
   auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D134723.463156.patch
Type: text/x-patch
Size: 3257 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220927/31c61007/attachment.bin>


More information about the llvm-commits mailing list