[llvm] b761137 - [AMDGPU] Use correct VGPR threshold for flagging ExcessRP regions in unified register file case (#85860)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 25 13:12:03 PDT 2024
Author: Jeffrey Byrnes
Date: 2024-03-25T13:11:58-07:00
New Revision: b7611370491873722e08e4ce9374312d0c936af1
URL: https://github.com/llvm/llvm-project/commit/b7611370491873722e08e4ce9374312d0c936af1
DIFF: https://github.com/llvm/llvm-project/commit/b7611370491873722e08e4ce9374312d0c936af1.diff
LOG: [AMDGPU] Use correct VGPR threshold for flagging ExcessRP regions in unified register file case (#85860)
`ST.getMaxNumVGPRs(MF)` lowers to `AMDGPUBaseInfo.cpp:getTotalNumVGPRs`
which returns 512 for gfx90a. This is subsequently limited by
`AMDGPUBaseInfo:getAddressableNumVGPRs()`, which also returns 512 for
gfx90a. The ISA states we can have a total of 512 registers, but a
maximum of only 256 of each of AGPR and VGPR (gfx90a 3.6.4).
Therefore, in unified register file case, `ST.getMaxNumVGPRs(MF)`
calculates the maximum number of combined VGPR + AGPR. But, it is
currently used as the limit for accvgpr and as the limit for archvgpr.
This patch uses it as the combined limit, and accounts for the maximum addressable arch/acc VGPRs when calculating the per RegClass limits.
It is not unreasonable to think other clients of getTotalNumVGPRs are
using it in the wrong way.
Added:
Modified:
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.single.2b.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 9f419a7fbf6834..94d93390d0916f 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -973,12 +973,17 @@ void GCNSchedStage::checkScheduling() {
LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
<< DAG.MinOccupancy << ".\n");
}
-
+ // The maximum number of arch VGPR on non-unified register file, or the
+ // maximum VGPR + AGPR in the unified register file case.
unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
+ // The maximum number of arch VGPR for both unified and non-unified register
+ // file.
+ unsigned MaxArchVGPRs = std::min(MaxVGPRs, ST.getAddressableNumArchVGPRs());
unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
- if (PressureAfter.getVGPRNum(false) > MaxVGPRs ||
- PressureAfter.getAGPRNum() > MaxVGPRs ||
+ if (PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) > MaxVGPRs ||
+ PressureAfter.getVGPRNum(false) > MaxArchVGPRs ||
+ PressureAfter.getAGPRNum() > MaxArchVGPRs ||
PressureAfter.getSGPRNum() > MaxSGPRs) {
DAG.RescheduleRegions[RegionIdx] = true;
DAG.RegionsWithHighRP[RegionIdx] = true;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.single.2b.mir b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.single.2b.mir
index 091b29c23d60e2..e93595b9ef2735 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.single.2b.mir
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.single.2b.mir
@@ -4,6 +4,8 @@
--- |
define amdgpu_kernel void @single-wave-phase-2b(ptr addrspace(3) noalias %in0, ptr addrspace(3) noalias %in1, ptr addrspace(3) noalias %in2, ptr addrspace(3) noalias %in3, ptr addrspace(3) noalias %in4, ptr addrspace(3) noalias %in5, ptr addrspace(3) noalias %in6, ptr addrspace(3) noalias %in7, ptr addrspace(3) noalias %in8, ptr addrspace(3) noalias %in9, ptr addrspace(3) noalias %in10, ptr addrspace(3) noalias %in11, ptr addrspace(7) noalias %in12, ptr addrspace(7) noalias %in13, ptr addrspace(7) noalias %in14, ptr addrspace(7) noalias %in15, ptr addrspace(7) noalias %in16, ptr addrspace(7) noalias %in17, ptr addrspace(7) noalias %in18, ptr addrspace(7) noalias %in19, ptr addrspace(7) noalias %in20, ptr addrspace(7) noalias %in21, ptr addrspace(7) noalias %in22, ptr addrspace(7) noalias %in23, ptr addrspace(7) noalias %in24, ptr addrspace(7) noalias %in25, ptr addrspace(7) noalias %in26, ptr addrspace(7) noalias %in27, ptr addrspace(7) noalias %in28, ptr addrspace(7) noalias %in29) #0 { ret void }
+ attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" }
+
!0 = distinct !{!0}
!1 = !{!1, !0}
...
More information about the llvm-commits
mailing list