[PATCH] D83674: [AMDGPU] Calculate minimum allowed occupancy based on threads per lane

Jay Foad via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 13 06:11:41 PDT 2020


foad created this revision.
foad added reviewers: rampitec, nhaehnle, arsenm.
Herald added subscribers: llvm-commits, kerbowa, javed.absar, hiraditya, t-tye, tpr, dstuttard, yaxunl, wdng, jvesely, kzhuravl.
Herald added a project: LLVM.

Previously the minimum allowed occupancy (for memory-bound functions)
was a constant 4 waves per SIMD. Now it is a constant 16 threads per
SIMD lane, which better accounts for the architectural changes in GFX10.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D83674

Files:
  llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
  llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
  llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
  llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
  llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h


Index: llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
===================================================================
--- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -36,6 +36,7 @@
 
 namespace llvm {
 
+class GCNSubtarget;
 class MachineFrameInfo;
 class MachineFunction;
 class TargetRegisterClass;
@@ -915,11 +916,7 @@
     return Occupancy;
   }
 
-  unsigned getMinAllowedOccupancy() const {
-    if (!isMemoryBound() && !needsWaveLimiter())
-      return Occupancy;
-    return (Occupancy < 4) ? Occupancy : 4;
-  }
+  unsigned getMinAllowedOccupancy(const GCNSubtarget &ST) const;
 
   void limitOccupancy(const MachineFunction &MF);
 
Index: llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -190,6 +190,17 @@
     S.consumeInteger(0, GDSSize);
 }
 
+unsigned SIMachineFunctionInfo::getMinAllowedOccupancy(const GCNSubtarget &ST) const {
+  if (!isMemoryBound() && !needsWaveLimiter())
+    return Occupancy;
+  // Allow a minimum of 16 threads per SIMD lane, which works out as:
+  // - 4 waves per SIMD for GFX9 and below
+  // - 8 waves per SIMD for GFX10 wave64
+  // - 16 waves per SIMD for GFX10 wave32
+  unsigned MinOccupancy = ST.getTotalNumVGPRs() / 64;
+  return std::min(Occupancy, MinOccupancy);
+}
+
 void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
   limitOccupancy(getMaxWavesPerEU());
   const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
Index: llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
+++ llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
@@ -251,7 +251,7 @@
   RPT.advanceToNext();
   GCNRegPressure MaxPressure = RPT.moveMaxPressure();
   unsigned Occupancy = MaxPressure.getOccupancy(*ST);
-  if (Occupancy >= MFI->getMinAllowedOccupancy() &&
+  if (Occupancy >= MFI->getMinAllowedOccupancy(*ST) &&
       MaxPressure.getVGPRNum() <= MaxVGPRs &&
       MaxPressure.getSGPRNum() <= MaxSGPRs) {
     LastRecordedOccupancy = Occupancy;
Index: llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -358,9 +358,9 @@
   // Allow memory bound functions to drop to 4 waves if not limited by an
   // attribute.
   if (WavesAfter < WavesBefore && WavesAfter < MinOccupancy &&
-      WavesAfter >= MFI.getMinAllowedOccupancy()) {
+      WavesAfter >= MFI.getMinAllowedOccupancy(ST)) {
     LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
-                      << MFI.getMinAllowedOccupancy() << " waves\n");
+                      << MFI.getMinAllowedOccupancy(ST) << " waves\n");
     NewOccupancy = WavesAfter;
   }
   if (NewOccupancy < MinOccupancy) {
Index: llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -493,7 +493,7 @@
   bool TryMaximizeOccupancy) {
   const auto &ST = MF.getSubtarget<GCNSubtarget>();
   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-  auto TgtOcc = MFI->getMinAllowedOccupancy();
+  auto TgtOcc = MFI->getMinAllowedOccupancy(ST);
 
   sortRegionsByPressure(TgtOcc);
   auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
@@ -581,7 +581,7 @@
   bool TryMaximizeOccupancy) {
   const auto &ST = MF.getSubtarget<GCNSubtarget>();
   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-  auto TgtOcc = MFI->getMinAllowedOccupancy();
+  auto TgtOcc = MFI->getMinAllowedOccupancy(ST);
 
   sortRegionsByPressure(TgtOcc);
   auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D83674.277402.patch
Type: text/x-patch
Size: 4021 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200713/d8dc9c3e/attachment.bin>


More information about the llvm-commits mailing list