[llvm] [AMDGPU] Use correct VGPR threshold for flagging ExcessRP regions in unified register file case (PR #85860)
Jeffrey Byrnes via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 22 11:13:25 PDT 2024
https://github.com/jrbyrnes updated https://github.com/llvm/llvm-project/pull/85860
>From 5cba833e4bd21f3c0222af7fd605c5fe75804fbc Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Fri, 22 Mar 2024 11:00:10 -0700
Subject: [PATCH 1/2] [AMDGPU] Use correct VGPR threshold for flagging ExcessRP
regions in unified register file case
Change-Id: I5a4ddfbf7fcea931b47c18b0b5421cada7ea9541
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 6 ++++--
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.single.2b.mir | 2 ++
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 9f419a7fbf6834..60ae1ae4f07659 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -975,10 +975,12 @@ void GCNSchedStage::checkScheduling() {
}
unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
+ unsigned MaxArchVGPRs = std::min(MaxVGPRs, ST.getAddressableNumArchVGPRs());
unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
- if (PressureAfter.getVGPRNum(false) > MaxVGPRs ||
- PressureAfter.getAGPRNum() > MaxVGPRs ||
+ if (PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) > MaxVGPRs ||
+ PressureAfter.getVGPRNum(false) > MaxArchVGPRs ||
+ PressureAfter.getAGPRNum() > MaxArchVGPRs ||
PressureAfter.getSGPRNum() > MaxSGPRs) {
DAG.RescheduleRegions[RegionIdx] = true;
DAG.RegionsWithHighRP[RegionIdx] = true;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.single.2b.mir b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.single.2b.mir
index 091b29c23d60e2..e93595b9ef2735 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.single.2b.mir
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.single.2b.mir
@@ -4,6 +4,8 @@
--- |
define amdgpu_kernel void @single-wave-phase-2b(ptr addrspace(3) noalias %in0, ptr addrspace(3) noalias %in1, ptr addrspace(3) noalias %in2, ptr addrspace(3) noalias %in3, ptr addrspace(3) noalias %in4, ptr addrspace(3) noalias %in5, ptr addrspace(3) noalias %in6, ptr addrspace(3) noalias %in7, ptr addrspace(3) noalias %in8, ptr addrspace(3) noalias %in9, ptr addrspace(3) noalias %in10, ptr addrspace(3) noalias %in11, ptr addrspace(7) noalias %in12, ptr addrspace(7) noalias %in13, ptr addrspace(7) noalias %in14, ptr addrspace(7) noalias %in15, ptr addrspace(7) noalias %in16, ptr addrspace(7) noalias %in17, ptr addrspace(7) noalias %in18, ptr addrspace(7) noalias %in19, ptr addrspace(7) noalias %in20, ptr addrspace(7) noalias %in21, ptr addrspace(7) noalias %in22, ptr addrspace(7) noalias %in23, ptr addrspace(7) noalias %in24, ptr addrspace(7) noalias %in25, ptr addrspace(7) noalias %in26, ptr addrspace(7) noalias %in27, ptr addrspace(7) noalias %in28, ptr addrspace(7) noalias %in29) #0 { ret void }
+ attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" }
+
!0 = distinct !{!0}
!1 = !{!1, !0}
...
>From 00ab315157ce0e6877fadb3127e277f6ac5e9881 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Fri, 22 Mar 2024 11:07:22 -0700
Subject: [PATCH 2/2] add comment explaining the conditions
Change-Id: Iadeff5844c3e7baece274b03838f0457b1523c3b
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 60ae1ae4f07659..94d93390d0916f 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -973,8 +973,11 @@ void GCNSchedStage::checkScheduling() {
LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
<< DAG.MinOccupancy << ".\n");
}
-
+ // The maximum number of arch VGPR on non-unified register file, or the
+ // maximum VGPR + AGPR in the unified register file case.
unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
+ // The maximum number of arch VGPR for both unified and non-unified register
+ // file.
unsigned MaxArchVGPRs = std::min(MaxVGPRs, ST.getAddressableNumArchVGPRs());
unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
More information about the llvm-commits
mailing list