[llvm] [AMDGPU] Reduce size of SGPR arrays in SIInsertWaitcnts. NFC. (PR #130097)
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 6 04:54:34 PST 2025
https://github.com/jayfoad created https://github.com/llvm/llvm-project/pull/130097
None
>From e23cc6d167869ba3197a8b8db5cc32d96fee191f Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Thu, 6 Mar 2025 12:27:23 +0000
Subject: [PATCH] [AMDGPU] Reduce size of SGPR arrays in SIInsertWaitcnts. NFC.
---
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 13 ++++++-------
1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 14ee6ef24a082..54ec4a75fb2dc 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -132,7 +132,7 @@ enum WaitEventType {
enum RegisterMapping {
SQ_MAX_PGM_VGPRS = 512, // Maximum programmable VGPRs across all targets.
AGPR_OFFSET = 256, // Maximum programmable ArchVGPRs across all targets.
- SQ_MAX_PGM_SGPRS = 256, // Maximum programmable SGPRs across all targets.
+ SQ_MAX_PGM_SGPRS = 128, // Maximum programmable SGPRs across all targets.
NUM_EXTRA_VGPRS = 9, // Reserved slots for DS.
// Artificial register slots to track LDS writes into specific LDS locations
// if a location is known. When slots are exhausted or location is
@@ -757,14 +757,13 @@ RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
if (TRI->isAGPR(*MRI, Op.getReg()))
Result.first += AGPR_OFFSET;
assert(Result.first >= 0 && Result.first < SQ_MAX_PGM_VGPRS);
- } else if (TRI->isSGPRReg(*MRI, Op.getReg())) {
- assert(Reg < SQ_MAX_PGM_SGPRS);
+ } else if (TRI->isSGPRReg(*MRI, Op.getReg()) && Reg < SQ_MAX_PGM_SGPRS) {
+ // SGPRs including VCC, TTMPs and EXEC but excluding read-only scalar
+ // sources like SRC_PRIVATE_BASE.
Result.first = Reg + NUM_ALL_VGPRS;
- }
- // TODO: Handle TTMP
- // else if (TRI->isTTMP(*MRI, Reg.getReg())) ...
- else
+ } else {
return {-1, -1};
+ }
const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Op.getReg());
unsigned Size = TRI->getRegSizeInBits(*RC);
More information about the llvm-commits
mailing list