[llvm] [AMDGPU][True16][CodeGen] update waitcnt for true16 (PR #128927)
Brox Chen via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 10 10:59:13 PDT 2025
https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/128927
>From 45c8fe1b5c7001d18e50e50154d2c2592bbdf3f1 Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Wed, 26 Feb 2025 13:09:03 -0500
Subject: [PATCH 1/2] insertwaitcnt pass update for true16
---
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 30 ++++++++++++---------
llvm/lib/Target/AMDGPU/SIRegisterInfo.h | 13 +++++++++
llvm/test/CodeGen/AMDGPU/spillv16.ll | 2 +-
3 files changed, 32 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 13e01b53639d2..ffabaa6ba4107 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -130,10 +130,10 @@ enum WaitEventType {
// We reserve a fixed number of VGPR slots in the scoring tables for
// special tokens like SCMEM_LDS (needed for buffer load to LDS).
enum RegisterMapping {
- SQ_MAX_PGM_VGPRS = 512, // Maximum programmable VGPRs across all targets.
- AGPR_OFFSET = 256, // Maximum programmable ArchVGPRs across all targets.
- SQ_MAX_PGM_SGPRS = 128, // Maximum programmable SGPRs across all targets.
- NUM_EXTRA_VGPRS = 9, // Reserved slots for DS.
+ SQ_MAX_PGM_VGPRS = 1024, // Maximum programmable VGPRs across all targets.
+ AGPR_OFFSET = 512, // Maximum programmable ArchVGPRs across all targets.
+ SQ_MAX_PGM_SGPRS = 128, // Maximum programmable SGPRs across all targets.
+ NUM_EXTRA_VGPRS = 9, // Reserved slots for DS.
// Artificial register slots to track LDS writes into specific LDS locations
// if a location is known. When slots are exhausted or location is
// unknown use the first slot. The first slot is also always updated in
@@ -748,27 +748,33 @@ RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
RegInterval Result;
- unsigned Reg = TRI->getEncodingValue(AMDGPU::getMCReg(Op.getReg(), *ST)) &
- AMDGPU::HWEncoding::REG_IDX_MASK;
+ MCRegister MCReg = AMDGPU::getMCReg(Op.getReg(), *ST);
+ unsigned RegIdx = TRI->getHWRegIndex(MCReg);
+ assert(isUInt<8>(RegIdx));
+ unsigned Reg = (RegIdx << 1) | (AMDGPU::isHi16Reg(MCReg, *TRI) ? 1 : 0);
+ const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Op.getReg());
+ unsigned Size = TRI->getRegSizeInBits(*RC);
+
+ // AGPRs/VGPRs are tracked every 16 bits, SGPRs by 32 bits
if (TRI->isVectorRegister(*MRI, Op.getReg())) {
assert(Reg <= SQ_MAX_PGM_VGPRS);
Result.first = Reg;
if (TRI->isAGPR(*MRI, Op.getReg()))
Result.first += AGPR_OFFSET;
assert(Result.first >= 0 && Result.first < SQ_MAX_PGM_VGPRS);
- } else if (TRI->isSGPRReg(*MRI, Op.getReg()) && Reg < SQ_MAX_PGM_SGPRS) {
+ assert(Size % 16 == 0);
+ Result.second = Result.first + (Size / 16);
+ } else if (TRI->isSGPRReg(*MRI, Op.getReg()) &&
+ (Reg >> 1) < SQ_MAX_PGM_SGPRS) {
// SGPRs including VCC, TTMPs and EXEC but excluding read-only scalar
// sources like SRC_PRIVATE_BASE.
- Result.first = Reg + NUM_ALL_VGPRS;
+ Result.first = (Reg >> 1) + NUM_ALL_VGPRS;
+ Result.second = Result.first + ((Size + 16) / 32);
} else {
return {-1, -1};
}
- const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Op.getReg());
- unsigned Size = TRI->getRegSizeInBits(*RC);
- Result.second = Result.first + ((Size + 16) / 32);
-
return Result;
}
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index a64180daea2ad..785f019687b55 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -295,8 +295,21 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
getRegClassForOperandReg(const MachineRegisterInfo &MRI,
const MachineOperand &MO) const;
+ bool isVGPR(MCRegister Reg) const {
+ const TargetRegisterClass *RC = getPhysRegBaseClass(Reg);
+ // Registers without classes are unaddressable, SGPR-like registers.
+ return RC && isVGPRClass(RC);
+ }
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const;
+ bool isAGPR(MCRegister Reg) const {
+ const TargetRegisterClass *RC = getPhysRegBaseClass(Reg);
+ // Registers without classes are unaddressable, SGPR-like registers.
+ return RC && isAGPRClass(RC);
+ }
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const;
+ bool isVectorRegister(MCRegister Reg) const {
+ return isVGPR(Reg) || isAGPR(Reg);
+ }
bool isVectorRegister(const MachineRegisterInfo &MRI, Register Reg) const {
return isVGPR(MRI, Reg) || isAGPR(MRI, Reg);
}
diff --git a/llvm/test/CodeGen/AMDGPU/spillv16.ll b/llvm/test/CodeGen/AMDGPU/spillv16.ll
index 0e45df223465d..3d21860e2af40 100644
--- a/llvm/test/CodeGen/AMDGPU/spillv16.ll
+++ b/llvm/test/CodeGen/AMDGPU/spillv16.ll
@@ -61,8 +61,8 @@ define void @spill_i16_alu_two_vals() {
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:4 glc dlc
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GCN-TRUE16-NEXT: scratch_load_d16_hi_b16 v0, off, s32 offset:6 ; 2-byte Folded Reload
-; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GCN-TRUE16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
+; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GCN-TRUE16-NEXT: scratch_store_d16_hi_b16 off, v0, s32 dlc
; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:4 dlc
>From 3400d81446e9b307abcf08f820f783bf2ffb366a Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Mon, 10 Mar 2025 13:11:18 -0400
Subject: [PATCH 2/2] address comment
---
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 9 ++++-----
llvm/lib/Target/AMDGPU/SIRegisterInfo.h | 13 -------------
2 files changed, 4 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index ffabaa6ba4107..e57270113bff9 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -751,13 +751,13 @@ RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
MCRegister MCReg = AMDGPU::getMCReg(Op.getReg(), *ST);
unsigned RegIdx = TRI->getHWRegIndex(MCReg);
assert(isUInt<8>(RegIdx));
- unsigned Reg = (RegIdx << 1) | (AMDGPU::isHi16Reg(MCReg, *TRI) ? 1 : 0);
const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Op.getReg());
unsigned Size = TRI->getRegSizeInBits(*RC);
// AGPRs/VGPRs are tracked every 16 bits, SGPRs by 32 bits
if (TRI->isVectorRegister(*MRI, Op.getReg())) {
+ unsigned Reg = (RegIdx << 1) | (AMDGPU::isHi16Reg(MCReg, *TRI) ? 1 : 0);
assert(Reg <= SQ_MAX_PGM_VGPRS);
Result.first = Reg;
if (TRI->isAGPR(*MRI, Op.getReg()))
@@ -765,12 +765,11 @@ RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
assert(Result.first >= 0 && Result.first < SQ_MAX_PGM_VGPRS);
assert(Size % 16 == 0);
Result.second = Result.first + (Size / 16);
- } else if (TRI->isSGPRReg(*MRI, Op.getReg()) &&
- (Reg >> 1) < SQ_MAX_PGM_SGPRS) {
+ } else if (TRI->isSGPRReg(*MRI, Op.getReg()) && RegIdx < SQ_MAX_PGM_SGPRS) {
// SGPRs including VCC, TTMPs and EXEC but excluding read-only scalar
// sources like SRC_PRIVATE_BASE.
- Result.first = (Reg >> 1) + NUM_ALL_VGPRS;
- Result.second = Result.first + ((Size + 16) / 32);
+ Result.first = RegIdx + NUM_ALL_VGPRS;
+ Result.second = Result.first + divideCeil(Size, 32);
} else {
return {-1, -1};
}
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 785f019687b55..a64180daea2ad 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -295,21 +295,8 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
getRegClassForOperandReg(const MachineRegisterInfo &MRI,
const MachineOperand &MO) const;
- bool isVGPR(MCRegister Reg) const {
- const TargetRegisterClass *RC = getPhysRegBaseClass(Reg);
- // Registers without classes are unaddressable, SGPR-like registers.
- return RC && isVGPRClass(RC);
- }
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const;
- bool isAGPR(MCRegister Reg) const {
- const TargetRegisterClass *RC = getPhysRegBaseClass(Reg);
- // Registers without classes are unaddressable, SGPR-like registers.
- return RC && isAGPRClass(RC);
- }
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const;
- bool isVectorRegister(MCRegister Reg) const {
- return isVGPR(Reg) || isAGPR(Reg);
- }
bool isVectorRegister(const MachineRegisterInfo &MRI, Register Reg) const {
return isVGPR(MRI, Reg) || isAGPR(MRI, Reg);
}
More information about the llvm-commits
mailing list