[PATCH] D83674: [AMDGPU] Calculate minimum allowed occupancy based on threads per lane
Jay Foad via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 13 06:11:41 PDT 2020
foad created this revision.
foad added reviewers: rampitec, nhaehnle, arsenm.
Herald added subscribers: llvm-commits, kerbowa, javed.absar, hiraditya, t-tye, tpr, dstuttard, yaxunl, wdng, jvesely, kzhuravl.
Herald added a project: LLVM.
Previously the minimum allowed occupancy (for memory-bound functions)
was a constant 4 waves per SIMD. Now it is a constant 16 threads per
SIMD lane, which better accounts for the architectural changes in GFX10.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D83674
Files:
llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
Index: llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
===================================================================
--- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -36,6 +36,7 @@
namespace llvm {
+class GCNSubtarget;
class MachineFrameInfo;
class MachineFunction;
class TargetRegisterClass;
@@ -915,11 +916,7 @@
return Occupancy;
}
- unsigned getMinAllowedOccupancy() const {
- if (!isMemoryBound() && !needsWaveLimiter())
- return Occupancy;
- return (Occupancy < 4) ? Occupancy : 4;
- }
+ unsigned getMinAllowedOccupancy(const GCNSubtarget &ST) const;
void limitOccupancy(const MachineFunction &MF);
Index: llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -190,6 +190,17 @@
S.consumeInteger(0, GDSSize);
}
+unsigned SIMachineFunctionInfo::getMinAllowedOccupancy(const GCNSubtarget &ST) const {
+ if (!isMemoryBound() && !needsWaveLimiter())
+ return Occupancy;
+ // Allow a minimum of 16 threads per SIMD lane, which works out as:
+ // - 4 waves per SIMD for GFX9 and below
+ // - 8 waves per SIMD for GFX10 wave64
+ // - 16 waves per SIMD for GFX10 wave32
+ unsigned MinOccupancy = ST.getTotalNumVGPRs() / 64;
+ return std::min(Occupancy, MinOccupancy);
+}
+
void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
limitOccupancy(getMaxWavesPerEU());
const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
Index: llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
+++ llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
@@ -251,7 +251,7 @@
RPT.advanceToNext();
GCNRegPressure MaxPressure = RPT.moveMaxPressure();
unsigned Occupancy = MaxPressure.getOccupancy(*ST);
- if (Occupancy >= MFI->getMinAllowedOccupancy() &&
+ if (Occupancy >= MFI->getMinAllowedOccupancy(*ST) &&
MaxPressure.getVGPRNum() <= MaxVGPRs &&
MaxPressure.getSGPRNum() <= MaxSGPRs) {
LastRecordedOccupancy = Occupancy;
Index: llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -358,9 +358,9 @@
// Allow memory bound functions to drop to 4 waves if not limited by an
// attribute.
if (WavesAfter < WavesBefore && WavesAfter < MinOccupancy &&
- WavesAfter >= MFI.getMinAllowedOccupancy()) {
+ WavesAfter >= MFI.getMinAllowedOccupancy(ST)) {
LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
- << MFI.getMinAllowedOccupancy() << " waves\n");
+ << MFI.getMinAllowedOccupancy(ST) << " waves\n");
NewOccupancy = WavesAfter;
}
if (NewOccupancy < MinOccupancy) {
Index: llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -493,7 +493,7 @@
bool TryMaximizeOccupancy) {
const auto &ST = MF.getSubtarget<GCNSubtarget>();
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- auto TgtOcc = MFI->getMinAllowedOccupancy();
+ auto TgtOcc = MFI->getMinAllowedOccupancy(ST);
sortRegionsByPressure(TgtOcc);
auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
@@ -581,7 +581,7 @@
bool TryMaximizeOccupancy) {
const auto &ST = MF.getSubtarget<GCNSubtarget>();
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- auto TgtOcc = MFI->getMinAllowedOccupancy();
+ auto TgtOcc = MFI->getMinAllowedOccupancy(ST);
sortRegionsByPressure(TgtOcc);
auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D83674.277402.patch
Type: text/x-patch
Size: 4021 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200713/d8dc9c3e/attachment.bin>
More information about the llvm-commits
mailing list