[llvm] [AMDGPU] Use correct VGPR threshold for flagging ExcessRP regions in unified register file case (PR #85860)
Jeffrey Byrnes via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 22 09:55:09 PDT 2024
https://github.com/jrbyrnes updated https://github.com/llvm/llvm-project/pull/85860
>From 188669688b279349002cd6851f12f4c29d50a1d5 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 19 Mar 2024 12:57:52 -0700
Subject: [PATCH 1/6] [AMDGPU] Use correct VGPR threshold for flagging ExcessRP
regions in unified register file case
Change-Id: Ie09cd894f433b3bb43a031a6eab44d9dc2dab0c6
---
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 18 +++++++++-------
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 4 ++--
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 8 ++++---
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 21 ++++++++++++-------
.../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 7 +++++--
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 3 ++-
.../AMDGPU/llvm.amdgcn.iglp.opt.single.2b.mir | 2 ++
7 files changed, 41 insertions(+), 22 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index fa77b94fc22def..248713bca8a688 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -809,11 +809,13 @@ unsigned GCNSubtarget::getMaxNumSGPRs(const Function &F) const {
getReservedNumSGPRs(F));
}
-unsigned GCNSubtarget::getBaseMaxNumVGPRs(
- const Function &F, std::pair<unsigned, unsigned> WavesPerEU) const {
+unsigned
+GCNSubtarget::getBaseMaxNumVGPRs(const Function &F,
+ std::pair<unsigned, unsigned> WavesPerEU,
+ bool WholeRegisterFile) const {
// Compute maximum number of VGPRs function can use using default/requested
// minimum number of waves per execution unit.
- unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first);
+ unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first, WholeRegisterFile);
// Check if maximum number of VGPRs was explicitly requested using
// "amdgpu-num-vgpr" attribute.
@@ -839,14 +841,16 @@ unsigned GCNSubtarget::getBaseMaxNumVGPRs(
return MaxNumVGPRs;
}
-unsigned GCNSubtarget::getMaxNumVGPRs(const Function &F) const {
- return getBaseMaxNumVGPRs(F, getWavesPerEU(F));
+unsigned GCNSubtarget::getMaxNumVGPRs(const Function &F,
+ bool WholeRegisterFile) const {
+ return getBaseMaxNumVGPRs(F, getWavesPerEU(F), WholeRegisterFile);
}
-unsigned GCNSubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
+unsigned GCNSubtarget::getMaxNumVGPRs(const MachineFunction &MF,
+ bool WholeRegisterFile) const {
const Function &F = MF.getFunction();
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
- return getBaseMaxNumVGPRs(F, MFI.getWavesPerEU());
+ return getBaseMaxNumVGPRs(F, MFI.getWavesPerEU(), WholeRegisterFile);
}
void GCNSubtarget::adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use,
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 5c394e6d6296d0..b6fd5deb880f76 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -110,7 +110,7 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
if (Occ != OtherOcc)
return Occ > OtherOcc;
- unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
+ unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF, /*WholeRegisterFile*/ true);
unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
// SGPR excess pressure conditions
@@ -124,7 +124,7 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
unsigned OtherVGPRForSGPRSpills =
(OtherExcessSGPR + (WaveSize - 1)) / WaveSize;
- unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs();
+ unsigned MaxArchVGPRs = ST.getMaxNumVGPRs(MF, /*WholeRegisterFile*/ false);
// Unified excess pressure conditions, accounting for VGPRs used for SGPR
// spills
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 9f419a7fbf6834..7e73f5ce49e9b8 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -974,11 +974,13 @@ void GCNSchedStage::checkScheduling() {
<< DAG.MinOccupancy << ".\n");
}
- unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
+ unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF, /*WholeRegisterFile*/ true);
+ unsigned MaxArchVGPRs = ST.getMaxNumVGPRs(MF, /*WholeRegisterFile*/ false);
unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
- if (PressureAfter.getVGPRNum(false) > MaxVGPRs ||
- PressureAfter.getAGPRNum() > MaxVGPRs ||
+ if (PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) > MaxVGPRs ||
+ PressureAfter.getVGPRNum(false) > MaxArchVGPRs ||
+ PressureAfter.getAGPRNum() > MaxArchVGPRs ||
PressureAfter.getSGPRNum() > MaxSGPRs) {
DAG.RescheduleRegions[RegionIdx] = true;
DAG.RegionsWithHighRP[RegionIdx] = true;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index ca51da659c3311..fd3d124e257414 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1414,26 +1414,32 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// \returns the maximum number of VGPRs that can be used and still achieved
/// at least the specified number of waves \p WavesPerEU.
- unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
- return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
+ unsigned getMaxNumVGPRs(unsigned WavesPerEU,
+ bool WholeRegisterFile = true) const {
+ return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU, WholeRegisterFile);
}
/// \returns max num VGPRs. This is the common utility function
/// called by MachineFunction and Function variants of getMaxNumVGPRs.
unsigned getBaseMaxNumVGPRs(const Function &F,
- std::pair<unsigned, unsigned> WavesPerEU) const;
+ std::pair<unsigned, unsigned> WavesPerEU,
+ bool WholeRegisterFile) const;
/// \returns Maximum number of VGPRs that meets number of waves per execution
/// unit requirement for function \p F, or number of VGPRs explicitly
/// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
+ /// If \p WholeRegisterFile is false and our target has a unified register
+ /// file, getMaxNumVGPRs will instead \return the maxmium number of ArchVGPRs.
///
/// \returns Value that meets number of waves per execution unit requirement
/// if explicitly requested value cannot be converted to integer, violates
/// subtarget's specifications, or does not meet number of waves per execution
/// unit requirement.
- unsigned getMaxNumVGPRs(const Function &F) const;
+ unsigned getMaxNumVGPRs(const Function &F,
+ bool WholeRegisterFile = true) const;
- unsigned getMaxNumAGPRs(const Function &F) const {
- return getMaxNumVGPRs(F);
+ unsigned getMaxNumAGPRs(const Function &F,
+ bool WholeRegisterFile = true) const {
+ return getMaxNumVGPRs(F, WholeRegisterFile);
}
/// \returns Maximum number of VGPRs that meets number of waves per execution
@@ -1444,7 +1450,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// if explicitly requested value cannot be converted to integer, violates
/// subtarget's specifications, or does not meet number of waves per execution
/// unit requirement.
- unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
+ unsigned getMaxNumVGPRs(const MachineFunction &MF,
+ bool WholeRegisterFile = true) const;
void getPostRAMutations(
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 6d53f68ace70df..a75b080b5850af 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1155,12 +1155,15 @@ unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
return std::min(MinNumVGPRs, AddrsableNumVGPRs);
}
-unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
+unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
+ bool WholeRegisterFile) {
assert(WavesPerEU != 0);
unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
getVGPRAllocGranule(STI));
- unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
+ unsigned AddressableNumVGPRs = WholeRegisterFile
+ ? getAddressableNumVGPRs(STI)
+ : getAddressableNumArchVGPRs(STI);
return std::min(MaxNumVGPRs, AddressableNumVGPRs);
}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 29ac402d953513..f49bb26e060e37 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -308,7 +308,8 @@ unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
/// \returns Maximum number of VGPRs that meets given number of waves per
/// execution unit requirement for given subtarget \p STI.
-unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
+unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
+ bool WholeRegisterFile);
/// \returns Number of waves reachable for a given \p NumVGPRs usage for given
/// subtarget \p STI.
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.single.2b.mir b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.single.2b.mir
index 091b29c23d60e2..dc5df7f039b04c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.single.2b.mir
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.single.2b.mir
@@ -4,6 +4,8 @@
--- |
define amdgpu_kernel void @single-wave-phase-2b(ptr addrspace(3) noalias %in0, ptr addrspace(3) noalias %in1, ptr addrspace(3) noalias %in2, ptr addrspace(3) noalias %in3, ptr addrspace(3) noalias %in4, ptr addrspace(3) noalias %in5, ptr addrspace(3) noalias %in6, ptr addrspace(3) noalias %in7, ptr addrspace(3) noalias %in8, ptr addrspace(3) noalias %in9, ptr addrspace(3) noalias %in10, ptr addrspace(3) noalias %in11, ptr addrspace(7) noalias %in12, ptr addrspace(7) noalias %in13, ptr addrspace(7) noalias %in14, ptr addrspace(7) noalias %in15, ptr addrspace(7) noalias %in16, ptr addrspace(7) noalias %in17, ptr addrspace(7) noalias %in18, ptr addrspace(7) noalias %in19, ptr addrspace(7) noalias %in20, ptr addrspace(7) noalias %in21, ptr addrspace(7) noalias %in22, ptr addrspace(7) noalias %in23, ptr addrspace(7) noalias %in24, ptr addrspace(7) noalias %in25, ptr addrspace(7) noalias %in26, ptr addrspace(7) noalias %in27, ptr addrspace(7) noalias %in28, ptr addrspace(7) noalias %in29) #0 { ret void }
+ attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" }
+
!0 = distinct !{!0}
!1 = !{!1, !0}
...
>From db3214c16372380e3789b8bf038d29b978c954ff Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 19 Mar 2024 15:22:14 -0700
Subject: [PATCH 2/6] fix logic/calculation bug
---
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 9 +++++----
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 3 ++-
2 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index a75b080b5850af..25afa07bf90142 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1102,9 +1102,9 @@ unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
return IsWave32 ? 8 : 4;
}
-unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
+unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI, bool WholeRegisterFile) {
if (STI->getFeatureBits().test(FeatureGFX90AInsts))
- return 512;
+ return WholeRegisterFile ? 512 : getAddressableNumArchVGPRs(STI);
if (!isGFX10Plus(*STI))
return 256;
bool IsWave32 = STI->getFeatureBits().test(FeatureWavefrontSize32);
@@ -1159,8 +1159,9 @@ unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
bool WholeRegisterFile) {
assert(WavesPerEU != 0);
- unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
- getVGPRAllocGranule(STI));
+ unsigned MaxNumVGPRs =
+ alignDown(getTotalNumVGPRs(STI, WholeRegisterFile) / WavesPerEU,
+ getVGPRAllocGranule(STI));
unsigned AddressableNumVGPRs = WholeRegisterFile
? getAddressableNumVGPRs(STI)
: getAddressableNumArchVGPRs(STI);
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index f49bb26e060e37..e56df21faa0ca0 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -293,7 +293,8 @@ unsigned getVGPREncodingGranule(
std::optional<bool> EnableWavefrontSize32 = std::nullopt);
/// \returns Total number of VGPRs for given subtarget \p STI.
-unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
+unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI,
+ bool WholeRegisterFile = true);
/// \returns Addressable number of architectural VGPRs for a given subtarget \p
/// STI.
>From dca10b1045e34325c0aad7a3f7f1d00065266bde Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Thu, 21 Mar 2024 10:38:45 -0700
Subject: [PATCH 3/6] clamp to TotalNumArch + disallow whole register file for
agpr
Change-Id: Ief7c8cd606934185968a914105b8795f669e204d
---
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 5 ++---
.../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 19 +++++++++++++++----
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 8 ++++++--
3 files changed, 23 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index fd3d124e257414..2b1e7570f9e96f 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1437,9 +1437,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
unsigned getMaxNumVGPRs(const Function &F,
bool WholeRegisterFile = true) const;
- unsigned getMaxNumAGPRs(const Function &F,
- bool WholeRegisterFile = true) const {
- return getMaxNumVGPRs(F, WholeRegisterFile);
+ unsigned getMaxNumAGPRs(const Function &F) const {
+ return getMaxNumVGPRs(F, false);
}
/// \returns Maximum number of VGPRs that meets number of waves per execution
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 25afa07bf90142..1c91ff57c1f3dd 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1102,9 +1102,9 @@ unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
return IsWave32 ? 8 : 4;
}
-unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI, bool WholeRegisterFile) {
+unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
if (STI->getFeatureBits().test(FeatureGFX90AInsts))
- return WholeRegisterFile ? 512 : getAddressableNumArchVGPRs(STI);
+ return 512;
if (!isGFX10Plus(*STI))
return 256;
bool IsWave32 = STI->getFeatureBits().test(FeatureWavefrontSize32);
@@ -1113,6 +1113,12 @@ unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI, bool WholeRegisterFile) {
return IsWave32 ? 1024 : 512;
}
+unsigned getTotalNumArchVGPRs(const MCSubtargetInfo *STI) {
+ if (STI->getFeatureBits().test(FeatureGFX90AInsts))
+ return 256;
+ return getTotalNumVGPRs(STI);
+}
+
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI) { return 256; }
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
@@ -1159,9 +1165,14 @@ unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
bool WholeRegisterFile) {
assert(WavesPerEU != 0);
+ unsigned VGPRsInRegFile =
+ WholeRegisterFile
+ ? getTotalNumVGPRs(STI)
+ : std::min(getTotalNumVGPRs(STI), getTotalNumArchVGPRs(STI));
+
unsigned MaxNumVGPRs =
- alignDown(getTotalNumVGPRs(STI, WholeRegisterFile) / WavesPerEU,
- getVGPRAllocGranule(STI));
+ alignDown(VGPRsInRegFile / WavesPerEU, getVGPRAllocGranule(STI));
+
unsigned AddressableNumVGPRs = WholeRegisterFile
? getAddressableNumVGPRs(STI)
: getAddressableNumArchVGPRs(STI);
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index e56df21faa0ca0..c367ec96daf029 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -293,8 +293,12 @@ unsigned getVGPREncodingGranule(
std::optional<bool> EnableWavefrontSize32 = std::nullopt);
/// \returns Total number of VGPRs for given subtarget \p STI.
-unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI,
- bool WholeRegisterFile = true);
+unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
+
+/// \returns Total number of Arch VGPRs for given subtarget \p STI. This is
+/// only different from getTotalNumVGPRs if the target has a unified register
+/// file.
+unsigned getTotalNumArchVGPRs(const MCSubtargetInfo *STI);
/// \returns Addressable number of architectural VGPRs for a given subtarget \p
/// STI.
>From 90a3ba63a5ce309140fa4407b461b74abb744218 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Thu, 21 Mar 2024 13:59:03 -0700
Subject: [PATCH 4/6] Use getTotalNumVGPRs for maxVGPR calculation (do not
preclamp)
Change-Id: I151d9224db064da7004cc248118dc77bc70b3ea0
---
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 7 +------
1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 1c91ff57c1f3dd..27c4690bfb722b 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1165,13 +1165,8 @@ unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
bool WholeRegisterFile) {
assert(WavesPerEU != 0);
- unsigned VGPRsInRegFile =
- WholeRegisterFile
- ? getTotalNumVGPRs(STI)
- : std::min(getTotalNumVGPRs(STI), getTotalNumArchVGPRs(STI));
-
unsigned MaxNumVGPRs =
- alignDown(VGPRsInRegFile / WavesPerEU, getVGPRAllocGranule(STI));
+ alignDown(getTotalNumVGPRs(STI) / WavesPerEU, getVGPRAllocGranule(STI));
unsigned AddressableNumVGPRs = WholeRegisterFile
? getAddressableNumVGPRs(STI)
>From e9eb07da83324af082dfc3035ed3911d9010f85f Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <jeffrey.byrnes at amd.com>
Date: Fri, 22 Mar 2024 09:54:50 -0700
Subject: [PATCH 5/6] Update llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
clang-format comment
Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index b6fd5deb880f76..16ed919ffc86e9 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -124,7 +124,7 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
unsigned OtherVGPRForSGPRSpills =
(OtherExcessSGPR + (WaveSize - 1)) / WaveSize;
- unsigned MaxArchVGPRs = ST.getMaxNumVGPRs(MF, /*WholeRegisterFile*/ false);
+ unsigned MaxArchVGPRs = ST.getMaxNumVGPRs(MF, /*WholeRegisterFile=*/ false);
// Unified excess pressure conditions, accounting for VGPRs used for SGPR
// spills
>From becee75c6030859fe5e336193d8a7ed8d834202d Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <jeffrey.byrnes at amd.com>
Date: Fri, 22 Mar 2024 09:55:01 -0700
Subject: [PATCH 6/6] Update llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
clang-format comment
Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 16ed919ffc86e9..d0fe32552eb6a1 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -110,7 +110,7 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
if (Occ != OtherOcc)
return Occ > OtherOcc;
- unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF, /*WholeRegisterFile*/ true);
+ unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF, /*WholeRegisterFile=*/ true);
unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
// SGPR excess pressure conditions
More information about the llvm-commits
mailing list