[llvm] d08c297 - [AMDGPU] Add MC support for new gfx1250 src_flat_scratch_base_lo/hi (#152203)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 5 14:35:51 PDT 2025
Author: Stanislav Mekhanoshin
Date: 2025-08-05T14:35:48-07:00
New Revision: d08c2977e86fc7220b3b9c5cb83616705c10046e
URL: https://github.com/llvm/llvm-project/commit/d08c2977e86fc7220b3b9c5cb83616705c10046e
DIFF: https://github.com/llvm/llvm-project/commit/d08c2977e86fc7220b3b9c5cb83616705c10046e.diff
LOG: [AMDGPU] Add MC support for new gfx1250 src_flat_scratch_base_lo/hi (#152203)
Added:
llvm/test/MC/AMDGPU/gfx1250_asm_operands.s
llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_operands.txt
Modified:
llvm/lib/Target/AMDGPU/AMDGPU.td
llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
llvm/lib/Target/AMDGPU/GCNSubtarget.h
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/lib/Target/AMDGPU/SIRegisterInfo.td
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 18f3c4761748a..d84f512f4976d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1365,6 +1365,13 @@ def FeatureXF32Insts : SubtargetFeature<"xf32-insts",
"v_mfma_f32_16x16x8_xf32 and v_mfma_f32_32x32x4_xf32"
>;
+def FeatureGloballyAddressableScratch : SubtargetFeature<
+ "globally-addressable-scratch",
+ "HasGloballyAddressableScratch",
+ "true",
+ "FLAT instructions can access scratch memory for any thread in any wave"
+>;
+
// FIXME: Remove after all users are migrated to attribute.
def FeatureDynamicVGPR : SubtargetFeature <"dynamic-vgpr",
"DynamicVGPR",
@@ -2055,6 +2062,7 @@ def FeatureISAVersion12_50 : FeatureSet<
FeatureAtomicFMinFMaxF64FlatInsts,
FeatureFlatBufferGlobalAtomicFaddF64Inst,
FeatureMemoryAtomicFAddF32DenormalSupport,
+ FeatureGloballyAddressableScratch,
FeatureKernargPreload,
FeatureVmemPrefInsts,
FeatureLshlAddU64Inst,
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index d33765db9cc7d..ff8efd2debc21 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1620,6 +1620,10 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
}
+ bool hasGloballyAddressableScratch() const {
+ return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
+ }
+
unsigned getNSAMaxSize(bool HasSampler = false) const {
return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
}
@@ -2759,46 +2763,48 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) {
static MCRegister getSpecialRegForName(StringRef RegName) {
return StringSwitch<unsigned>(RegName)
- .Case("exec", AMDGPU::EXEC)
- .Case("vcc", AMDGPU::VCC)
- .Case("flat_scratch", AMDGPU::FLAT_SCR)
- .Case("xnack_mask", AMDGPU::XNACK_MASK)
- .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
- .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
- .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
- .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
- .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
- .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
- .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
- .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
- .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
- .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
- .Case("lds_direct", AMDGPU::LDS_DIRECT)
- .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
- .Case("m0", AMDGPU::M0)
- .Case("vccz", AMDGPU::SRC_VCCZ)
- .Case("src_vccz", AMDGPU::SRC_VCCZ)
- .Case("execz", AMDGPU::SRC_EXECZ)
- .Case("src_execz", AMDGPU::SRC_EXECZ)
- .Case("scc", AMDGPU::SRC_SCC)
- .Case("src_scc", AMDGPU::SRC_SCC)
- .Case("tba", AMDGPU::TBA)
- .Case("tma", AMDGPU::TMA)
- .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
- .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
- .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
- .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
- .Case("vcc_lo", AMDGPU::VCC_LO)
- .Case("vcc_hi", AMDGPU::VCC_HI)
- .Case("exec_lo", AMDGPU::EXEC_LO)
- .Case("exec_hi", AMDGPU::EXEC_HI)
- .Case("tma_lo", AMDGPU::TMA_LO)
- .Case("tma_hi", AMDGPU::TMA_HI)
- .Case("tba_lo", AMDGPU::TBA_LO)
- .Case("tba_hi", AMDGPU::TBA_HI)
- .Case("pc", AMDGPU::PC_REG)
- .Case("null", AMDGPU::SGPR_NULL)
- .Default(AMDGPU::NoRegister);
+ .Case("exec", AMDGPU::EXEC)
+ .Case("vcc", AMDGPU::VCC)
+ .Case("flat_scratch", AMDGPU::FLAT_SCR)
+ .Case("xnack_mask", AMDGPU::XNACK_MASK)
+ .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
+ .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
+ .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
+ .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
+ .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
+ .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
+ .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
+ .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
+ .Case("src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
+ .Case("src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
+ .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
+ .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
+ .Case("lds_direct", AMDGPU::LDS_DIRECT)
+ .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
+ .Case("m0", AMDGPU::M0)
+ .Case("vccz", AMDGPU::SRC_VCCZ)
+ .Case("src_vccz", AMDGPU::SRC_VCCZ)
+ .Case("execz", AMDGPU::SRC_EXECZ)
+ .Case("src_execz", AMDGPU::SRC_EXECZ)
+ .Case("scc", AMDGPU::SRC_SCC)
+ .Case("src_scc", AMDGPU::SRC_SCC)
+ .Case("tba", AMDGPU::TBA)
+ .Case("tma", AMDGPU::TMA)
+ .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
+ .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
+ .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
+ .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
+ .Case("vcc_lo", AMDGPU::VCC_LO)
+ .Case("vcc_hi", AMDGPU::VCC_HI)
+ .Case("exec_lo", AMDGPU::EXEC_LO)
+ .Case("exec_hi", AMDGPU::EXEC_HI)
+ .Case("tma_lo", AMDGPU::TMA_LO)
+ .Case("tma_hi", AMDGPU::TMA_HI)
+ .Case("tba_lo", AMDGPU::TBA_LO)
+ .Case("tba_hi", AMDGPU::TBA_HI)
+ .Case("pc", AMDGPU::PC_REG)
+ .Case("null", AMDGPU::SGPR_NULL)
+ .Default(AMDGPU::NoRegister);
}
bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
@@ -6744,6 +6750,9 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
case SRC_PRIVATE_LIMIT_LO:
case SRC_PRIVATE_LIMIT:
return isGFX9Plus();
+ case SRC_FLAT_SCRATCH_BASE_LO:
+ case SRC_FLAT_SCRATCH_BASE_HI:
+ return hasGloballyAddressableScratch();
case SRC_POPS_EXITING_WAVE_ID:
return isGFX9Plus() && !isGFX11Plus();
case TBA:
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index fef0d7eb45a8c..fb7d634e62272 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -1914,6 +1914,8 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
case 126: return createRegOperand(EXEC_LO);
case 127: return createRegOperand(EXEC_HI);
+ case 230: return createRegOperand(SRC_FLAT_SCRATCH_BASE_LO);
+ case 231: return createRegOperand(SRC_FLAT_SCRATCH_BASE_HI);
case 235: return createRegOperand(SRC_SHARED_BASE_LO);
case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
@@ -1947,6 +1949,7 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
return createRegOperand(SGPR_NULL);
break;
case 126: return createRegOperand(EXEC);
+ case 230: return createRegOperand(SRC_FLAT_SCRATCH_BASE_LO);
case 235: return createRegOperand(SRC_SHARED_BASE);
case 236: return createRegOperand(SRC_SHARED_LIMIT);
case 237: return createRegOperand(SRC_PRIVATE_BASE);
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index c84ba1a0a9d47..5530886831cae 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -281,6 +281,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool RequiresCOV6 = false;
bool UseBlockVGPROpsForCSR = false;
+ bool HasGloballyAddressableScratch = false;
// Dummy feature to use for assembler in tablegen.
bool FeatureDisable = false;
@@ -1325,6 +1326,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool useVGPRBlockOpsForCSR() const { return UseBlockVGPROpsForCSR; }
+ bool hasGloballyAddressableScratch() const {
+ return HasGloballyAddressableScratch;
+ }
+
bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; }
bool hasVALUReadSGPRHazard() const { return GFX12Insts && !GFX1250Insts; }
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index f3acc5c2ea159..ae0f304ea3041 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -598,6 +598,8 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
+ reserveRegisterTuples(Reserved, AMDGPU::SRC_FLAT_SCRATCH_BASE_LO);
+ reserveRegisterTuples(Reserved, AMDGPU::SRC_FLAT_SCRATCH_BASE_HI);
// Reserve async counters pseudo registers
reserveRegisterTuples(Reserved, AMDGPU::ASYNCcnt);
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 08d07c927e4c4..ed6b973580502 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -246,6 +246,22 @@ defm SRC_SHARED_LIMIT : ApertureRegister<"src_shared_limit", 236>;
defm SRC_PRIVATE_BASE : ApertureRegister<"src_private_base", 237>;
defm SRC_PRIVATE_LIMIT : ApertureRegister<"src_private_limit", 238>;
+let isConstant = true in {
+ defm SRC_FLAT_SCRATCH_BASE_LO : SIRegLoHi16<"src_flat_scratch_base_lo", 230>;
+ defm SRC_FLAT_SCRATCH_BASE_HI : SIRegLoHi16<"src_flat_scratch_base_hi", 231>;
+
+ // Using src_flat_scratch_base_lo in a 64-bit context gets the full 64-bit
+ // hi:lo value.
+ def SRC_FLAT_SCRATCH_BASE :
+ RegisterWithSubRegs<"src_flat_scratch_base_lo",
+ [SRC_FLAT_SCRATCH_BASE_LO,
+ SRC_FLAT_SCRATCH_BASE_HI]> {
+ let Namespace = "AMDGPU";
+ let SubRegIndices = [sub0, sub1];
+ let HWEncoding = SRC_FLAT_SCRATCH_BASE_LO.HWEncoding;
+ }
+}
+
defm SRC_POPS_EXITING_WAVE_ID : SIRegLoHi16<"src_pops_exiting_wave_id", 239>;
// Not addressable
@@ -765,7 +781,7 @@ def SReg_32_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i
SGPR_NULL, SGPR_NULL_HI, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE_LO,
SRC_SHARED_LIMIT_LO, SRC_PRIVATE_BASE_LO, SRC_PRIVATE_LIMIT_LO, SRC_SHARED_BASE_HI,
SRC_SHARED_LIMIT_HI, SRC_PRIVATE_BASE_HI, SRC_PRIVATE_LIMIT_HI, SRC_POPS_EXITING_WAVE_ID,
- SRC_VCCZ, SRC_EXECZ, SRC_SCC)> {
+ SRC_VCCZ, SRC_EXECZ, SRC_SCC, SRC_FLAT_SCRATCH_BASE_LO, SRC_FLAT_SCRATCH_BASE_HI)> {
let AllocationPriority = 0;
}
@@ -776,7 +792,8 @@ def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16,
SRC_SHARED_LIMIT_LO_LO16, SRC_PRIVATE_BASE_LO_LO16, SRC_PRIVATE_LIMIT_LO_LO16,
SRC_SHARED_BASE_HI_LO16, SRC_SHARED_LIMIT_HI_LO16, SRC_PRIVATE_BASE_HI_LO16,
SRC_PRIVATE_LIMIT_HI_LO16, SRC_POPS_EXITING_WAVE_ID_LO16, SRC_VCCZ_LO16,
- SRC_EXECZ_LO16, SRC_SCC_LO16, EXEC_LO_LO16, EXEC_HI_LO16, M0_CLASS_LO16)> {
+ SRC_EXECZ_LO16, SRC_SCC_LO16, EXEC_LO_LO16, EXEC_HI_LO16, M0_CLASS_LO16,
+ SRC_FLAT_SCRATCH_BASE_LO_LO16, SRC_FLAT_SCRATCH_BASE_HI_LO16)> {
let Size = 16;
let isAllocatable = 0;
let BaseClassOrder = 16;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 65fa0884b11c9..00dcb9b52d4bd 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -2654,6 +2654,8 @@ bool isInlineValue(unsigned Reg) {
case AMDGPU::SRC_PRIVATE_BASE:
case AMDGPU::SRC_PRIVATE_LIMIT_LO:
case AMDGPU::SRC_PRIVATE_LIMIT:
+ case AMDGPU::SRC_FLAT_SCRATCH_BASE_LO:
+ case AMDGPU::SRC_FLAT_SCRATCH_BASE_HI:
case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
return true;
case AMDGPU::SRC_VCCZ:
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_operands.s b/llvm/test/MC/AMDGPU/gfx1250_asm_operands.s
new file mode 100644
index 0000000000000..8b7465b5df574
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_operands.s
@@ -0,0 +1,29 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX1200-ERR %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
+
+s_mov_b32 s0, src_flat_scratch_base_lo
+// GFX1200-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: src_flat_scratch_base_lo register not available on this GPU
+// GFX1250: encoding: [0xe6,0x00,0x80,0xbe]
+
+s_mov_b32 s0, src_flat_scratch_base_hi
+// GFX1200-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: src_flat_scratch_base_hi register not available on this GPU
+// GFX1250: encoding: [0xe7,0x00,0x80,0xbe]
+
+s_mov_b64 s[0:1], src_flat_scratch_base_lo
+// GFX1200-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: src_flat_scratch_base_lo register not available on this GPU
+// GFX1250: encoding: [0xe6,0x01,0x80,0xbe]
+
+s_mov_b64 s[0:1], shared_base
+// GFX1250: encoding: [0xeb,0x01,0x80,0xbe]
+
+s_mov_b64 s[0:1], src_shared_base
+// GFX1250: encoding: [0xeb,0x01,0x80,0xbe]
+
+s_mov_b64 s[0:1], shared_limit
+// GFX1250: encoding: [0xec,0x01,0x80,0xbe]
+
+s_mov_b64 s[0:1], src_shared_limit
+// GFX1250: encoding: [0xec,0x01,0x80,0xbe]
+
+s_getreg_b32 s1, hwreg(33)
+// GFX1250: encoding: [0x21,0xf8,0x81,0xb8]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_operands.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_operands.txt
new file mode 100644
index 0000000000000..a3e7e570ab9b6
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_operands.txt
@@ -0,0 +1,22 @@
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefix=GFX1250 %s
+
+# GFX1250: s_mov_b32 s0, src_flat_scratch_base_lo ; encoding: [0xe6,0x00,0x80,0xbe]
+0xe6,0x00,0x80,0xbe
+
+# GFX1250: s_mov_b32 s0, src_flat_scratch_base_hi ; encoding: [0xe7,0x00,0x80,0xbe]
+0xe7,0x00,0x80,0xbe
+
+# GFX1250: s_mov_b64 s[0:1], src_flat_scratch_base_lo ; encoding: [0xe6,0x01,0x80,0xbe]
+0xe6,0x01,0x80,0xbe
+
+# GFX1250: s_mov_b64 s[0:1], src_shared_base ; encoding: [0xeb,0x01,0x80,0xbe]
+0xeb,0x01,0x80,0xbe
+
+# GFX1250: s_mov_b64 s[0:1], src_shared_base ; encoding: [0xeb,0x01,0x80,0xbe]
+0xeb,0x01,0x80,0xbe
+
+# GFX1250: s_mov_b64 s[0:1], src_shared_limit ; encoding: [0xec,0x01,0x80,0xbe]
+0xec,0x01,0x80,0xbe
+
+# GFX1250: s_mov_b64 s[0:1], src_shared_limit ; encoding: [0xec,0x01,0x80,0xbe]
+0xec,0x01,0x80,0xbe
More information about the llvm-commits
mailing list