[llvm] 220147d - [AMDGPU] Make aperture registers 64 bit
Pierre van Houtryve via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 22 01:18:05 PST 2022
Author: Pierre van Houtryve
Date: 2022-11-22T09:17:59Z
New Revision: 220147d536f35d06c7037185d976ecae9df8f32c
URL: https://github.com/llvm/llvm-project/commit/220147d536f35d06c7037185d976ecae9df8f32c
DIFF: https://github.com/llvm/llvm-project/commit/220147d536f35d06c7037185d976ecae9df8f32c.diff
LOG: [AMDGPU] Make aperture registers 64 bit
Makes the SRC_(SHARED|PRIVATE)_(BASE|LIMIT) registers 64 bit instead of 32.
They're still usable as 32 bit operands by using the _LO suffix.
Preparation for D137542
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D137767
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/lib/Target/AMDGPU/SIRegisterInfo.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
index ede2b2b671c17..547ca652e57f5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
@@ -273,9 +273,13 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
case AMDGPU::M0:
case AMDGPU::M0_LO16:
case AMDGPU::M0_HI16:
+ case AMDGPU::SRC_SHARED_BASE_LO:
case AMDGPU::SRC_SHARED_BASE:
+ case AMDGPU::SRC_SHARED_LIMIT_LO:
case AMDGPU::SRC_SHARED_LIMIT:
+ case AMDGPU::SRC_PRIVATE_BASE_LO:
case AMDGPU::SRC_PRIVATE_BASE:
+ case AMDGPU::SRC_PRIVATE_LIMIT_LO:
case AMDGPU::SRC_PRIVATE_LIMIT:
case AMDGPU::SGPR_NULL:
case AMDGPU::SGPR_NULL64:
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 1f7bb1b730d43..882cdf2dada0b 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -2338,9 +2338,13 @@ void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
static bool isInlineValue(unsigned Reg) {
switch (Reg) {
+ case AMDGPU::SRC_SHARED_BASE_LO:
case AMDGPU::SRC_SHARED_BASE:
+ case AMDGPU::SRC_SHARED_LIMIT_LO:
case AMDGPU::SRC_SHARED_LIMIT:
+ case AMDGPU::SRC_PRIVATE_BASE_LO:
case AMDGPU::SRC_PRIVATE_BASE:
+ case AMDGPU::SRC_PRIVATE_LIMIT_LO:
case AMDGPU::SRC_PRIVATE_LIMIT:
case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
return true;
@@ -5737,9 +5741,13 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
return hasSGPR104_SGPR105();
switch (RegNo) {
+ case AMDGPU::SRC_SHARED_BASE_LO:
case AMDGPU::SRC_SHARED_BASE:
+ case AMDGPU::SRC_SHARED_LIMIT_LO:
case AMDGPU::SRC_SHARED_LIMIT:
+ case AMDGPU::SRC_PRIVATE_BASE_LO:
case AMDGPU::SRC_PRIVATE_BASE:
+ case AMDGPU::SRC_PRIVATE_LIMIT_LO:
case AMDGPU::SRC_PRIVATE_LIMIT:
return isGFX9Plus();
case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 9b9d405172e42..3e189e2cc2f38 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -1636,6 +1636,7 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
using namespace AMDGPU;
switch (Val) {
+ // clang-format off
case 102: return createRegOperand(FLAT_SCR_LO);
case 103: return createRegOperand(FLAT_SCR_HI);
case 104: return createRegOperand(XNACK_MASK_LO);
@@ -1652,16 +1653,17 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
case 126: return createRegOperand(EXEC_LO);
case 127: return createRegOperand(EXEC_HI);
- case 235: return createRegOperand(SRC_SHARED_BASE);
- case 236: return createRegOperand(SRC_SHARED_LIMIT);
- case 237: return createRegOperand(SRC_PRIVATE_BASE);
- case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
+ case 235: return createRegOperand(SRC_SHARED_BASE_LO);
+ case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
+ case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
+ case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
case 251: return createRegOperand(SRC_VCCZ);
case 252: return createRegOperand(SRC_EXECZ);
case 253: return createRegOperand(SRC_SCC);
case 254: return createRegOperand(LDS_DIRECT);
default: break;
+ // clang-format on
}
return errOperand(Val, "unknown operand encoding " + Twine(Val));
}
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 354e40d4f5bc3..0f10aceb98885 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -563,7 +563,7 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
reserveRegisterTuples(Reserved, AMDGPU::SRC_EXECZ);
reserveRegisterTuples(Reserved, AMDGPU::SRC_SCC);
- // Reserve the memory aperture registers.
+ // Reserve the memory aperture registers
reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index fae76be2b1ddb..b1e8761fb05e9 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -232,12 +232,36 @@ def SGPR_NULL64 :
let isConstant = true;
}
-let isConstant = true in {
-defm SRC_SHARED_BASE : SIRegLoHi16<"src_shared_base", 235>;
-defm SRC_SHARED_LIMIT : SIRegLoHi16<"src_shared_limit", 236>;
-defm SRC_PRIVATE_BASE : SIRegLoHi16<"src_private_base", 237>;
-defm SRC_PRIVATE_LIMIT : SIRegLoHi16<"src_private_limit", 238>;
-} // isConstant = true
+// Aperture registers are 64 bit registers with a LO/HI 32 bit.
+// HI 32 bit cannot be used, and LO 32 is used by instructions
+// with 32 bit sources.
+//
+// Note that the low 32 bits are essentially useless as they
+// don't contain the lower 32 bits of the address - they are in
+// the high 32 bits. The lower 32 bits are always zero (for base) or
+// -1 (for limit). Since we cannot access the high 32 bits, when we
+// need them, we need to do a 64 bit load and extract the bits manually.
+multiclass ApertureRegister<string name, bits<16> regIdx> {
+ let isConstant = true in {
+ // FIXME: We shouldn't need to define subregisters for these (nor add them to any 16 bit
+ // register classes), but if we don't it seems to confuse the TableGen
+ // backend and we end up with a lot of weird register pressure sets and classes.
+ defm _LO : SIRegLoHi16 <name, regIdx>;
+ defm _HI : SIRegLoHi16 <"", regIdx>;
+
+ def "" : RegisterWithSubRegs<name, [!cast<Register>(NAME#_LO), !cast<Register>(NAME#_HI)]> {
+ let Namespace = "AMDGPU";
+ let SubRegIndices = [sub0, sub1];
+ let HWEncoding = !cast<Register>(NAME#_LO).HWEncoding;
+ }
+ } // isConstant = true
+}
+
+defm SRC_SHARED_BASE : ApertureRegister<"src_shared_base", 235>;
+defm SRC_SHARED_LIMIT : ApertureRegister<"src_shared_limit", 236>;
+defm SRC_PRIVATE_BASE : ApertureRegister<"src_private_base", 237>;
+defm SRC_PRIVATE_LIMIT : ApertureRegister<"src_private_limit", 238>;
+
defm SRC_POPS_EXITING_WAVE_ID : SIRegLoHi16<"src_pops_exiting_wave_id", 239>;
// Not addressable
@@ -664,8 +688,9 @@ let GeneratePressureSet = 0, HasSGPR = 1 in {
// See comments in SIInstructions.td for more info.
def SReg_32_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
(add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI,
- SGPR_NULL, SGPR_NULL_HI, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE,
- SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, SRC_POPS_EXITING_WAVE_ID,
+ SGPR_NULL, SGPR_NULL_HI, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE_LO,
+ SRC_SHARED_LIMIT_LO, SRC_PRIVATE_BASE_LO, SRC_PRIVATE_LIMIT_LO, SRC_SHARED_BASE_HI,
+ SRC_SHARED_LIMIT_HI, SRC_PRIVATE_BASE_HI, SRC_PRIVATE_LIMIT_HI, SRC_POPS_EXITING_WAVE_ID,
SRC_VCCZ, SRC_EXECZ, SRC_SCC)> {
let AllocationPriority = 0;
}
@@ -673,9 +698,11 @@ def SReg_32_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2
def SReg_LO16_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i16, f16], 16,
(add SGPR_LO16, VCC_LO_LO16, VCC_HI_LO16, FLAT_SCR_LO_LO16, FLAT_SCR_HI_LO16,
XNACK_MASK_LO_LO16, XNACK_MASK_HI_LO16, SGPR_NULL_LO16, SGPR_NULL_HI_LO16, TTMP_LO16,
- TMA_LO_LO16, TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO16,
- SRC_SHARED_LIMIT_LO16, SRC_PRIVATE_BASE_LO16, SRC_PRIVATE_LIMIT_LO16,
- SRC_POPS_EXITING_WAVE_ID_LO16, SRC_VCCZ_LO16, SRC_EXECZ_LO16, SRC_SCC_LO16)> {
+ TMA_LO_LO16, TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO_LO16,
+ SRC_SHARED_LIMIT_LO_LO16, SRC_PRIVATE_BASE_LO_LO16, SRC_PRIVATE_LIMIT_LO_LO16,
+ SRC_SHARED_BASE_HI_LO16, SRC_SHARED_LIMIT_HI_LO16, SRC_PRIVATE_BASE_HI_LO16,
+ SRC_PRIVATE_LIMIT_HI_LO16, SRC_POPS_EXITING_WAVE_ID_LO16, SRC_VCCZ_LO16,
+ SRC_EXECZ_LO16, SRC_SCC_LO16)> {
let Size = 16;
let AllocationPriority = 0;
}
@@ -737,7 +764,8 @@ def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32,
}
def SReg_64_XEXEC : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
- (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, SGPR_NULL64, TTMP_64, TBA, TMA)> {
+ (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, SGPR_NULL64, SRC_SHARED_BASE,
+ SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, TTMP_64, TBA, TMA)> {
let CopyCost = 1;
let AllocationPriority = 1;
let HasSGPR = 1;
More information about the llvm-commits
mailing list