[llvm] [AMDGPU] GFX12: Add s_prefetch_inst/data instructions (PR #74448)
Mariusz Sikora via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 5 02:43:36 PST 2023
https://github.com/mariusz-sikora-at-amd created https://github.com/llvm/llvm-project/pull/74448
None
>From be8312027c10a0850bbc9b201640693a0ef8a7c9 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Date: Mon, 27 Jun 2022 15:00:20 -0700
Subject: [PATCH] [AMDGPU] GFX12: Add s_prefetch_inst/data instructions
---
.../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 6 +-
.../Disassembler/AMDGPUDisassembler.cpp | 6 +-
llvm/lib/Target/AMDGPU/SMInstructions.td | 57 +++++++++++++++++++
.../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 10 ++++
llvm/test/MC/AMDGPU/gfx12_asm_smem.s | 35 ++++++++++++
llvm/test/MC/AMDGPU/gfx12_err.s | 5 ++
.../Disassembler/AMDGPU/gfx12_dasm_smem.txt | 31 ++++++++++
7 files changed, 146 insertions(+), 4 deletions(-)
create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_smem.s
create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_smem.txt
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 649ca9d3e7487..042108aa9888d 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -4199,8 +4199,10 @@ bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
return true;
Error(getSMEMOffsetLoc(Operands),
- (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
- "expected a 21-bit signed offset");
+ isGFX12Plus()
+ ? "expected a 24-bit signed offset"
+ : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
+ : "expected a 21-bit signed offset");
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 5be542cd46883..3175f6358a045 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -91,9 +91,11 @@ static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
const MCDisassembler *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
int64_t Offset;
- if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
+ if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
+ Offset = SignExtend64<24>(Imm);
+ } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
Offset = Imm & 0xFFFFF;
- } else { // GFX9+ supports 21-bit signed offsets.
+ } else { // GFX9+ supports 21-bit signed offsets.
Offset = SignExtend64<21>(Imm);
}
return addOperand(Inst, MCOperand::createImm(Offset));
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index 6235965b6e165..c18846483cf95 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -211,6 +211,23 @@ class SM_WaveId_Pseudo<string opName, SDPatternOperator node> : SM_Pseudo<
let has_sbase = 0;
}
+class SM_Prefetch_Pseudo <string opName, RegisterClass baseClass, bit hasSBase>
+ : SM_Pseudo<opName, (outs), !con(!if(hasSBase, (ins baseClass:$sbase), (ins)),
+ (ins smem_offset:$offset, SReg_32:$soffset, i8imm:$sdata)),
+ !if(hasSBase, " $sbase,", "") # " $offset, $soffset, $sdata"> {
+ // Mark prefetches as both load and store to prevent reordering with loads
+ // and stores. This is also needed for pattern to match prefetch intrinsic.
+ let mayLoad = 1;
+ let mayStore = 1;
+ let has_glc = 0;
+ let LGKM_CNT = 0;
+ let has_sbase = hasSBase;
+ let ScalarStore = 0;
+ let has_offset = 1;
+ let has_soffset = 1;
+ let PseudoInstr = opName;
+}
+
//===----------------------------------------------------------------------===//
// Scalar Atomic Memory Classes
//===----------------------------------------------------------------------===//
@@ -415,6 +432,16 @@ defm S_DCACHE_DISCARD : SM_Pseudo_Discards;
defm S_DCACHE_DISCARD_X2 : SM_Pseudo_Discards;
}
+let SubtargetPredicate = isGFX12Plus in {
+def S_PREFETCH_INST : SM_Prefetch_Pseudo <"s_prefetch_inst", SReg_64, 1>;
+def S_PREFETCH_INST_PC_REL : SM_Prefetch_Pseudo <"s_prefetch_inst_pc_rel", SReg_64, 0>;
+def S_PREFETCH_DATA : SM_Prefetch_Pseudo <"s_prefetch_data", SReg_64, 1>;
+def S_PREFETCH_DATA_PC_REL : SM_Prefetch_Pseudo <"s_prefetch_data_pc_rel", SReg_64, 0>;
+def S_BUFFER_PREFETCH_DATA : SM_Prefetch_Pseudo <"s_buffer_prefetch_data", SReg_128, 1> {
+ let is_buffer = 1;
+}
+} // end let SubtargetPredicate = isGFX12Plus
+
//===----------------------------------------------------------------------===//
// Targets
//===----------------------------------------------------------------------===//
@@ -1203,3 +1230,33 @@ multiclass SM_Real_Probe_gfx11<bits<8> op> {
defm S_ATC_PROBE : SM_Real_Probe_gfx11 <0x22>;
defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx11 <0x23>;
+
+//===----------------------------------------------------------------------===//
+// GFX12.
+//===----------------------------------------------------------------------===//
+
+class SMEM_Real_gfx12<bits<8> op, SM_Pseudo ps, string opName = ps.Mnemonic> :
+ SMEM_Real_10Plus_common<op, ps, opName, SIEncodingFamily.GFX12,
+ SGPR_NULL_gfx11plus> {
+ let AssemblerPredicate = isGFX12Plus;
+ let DecoderNamespace = "GFX12";
+ let Inst{18-13} = op{5-0};
+ let Inst{19} = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0);
+ let Inst{24-20} = ?; // TODO-GFX12: Add new bits {24-20}: TH, Scope, NV
+ let Inst{25} = !if(ps.has_glc, cpol{CPolBit.GLC}, 0);
+ let Inst{55-32} = offset{23-0};
+}
+
+class SMEM_Real_Prefetch_gfx12 <bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx12<op, ps> {
+ bits<7> sdata; // Only 5 bits of sdata are supported.
+
+ let sdst = ?;
+ let Inst{12-11} = 0; // Unused sdata bits.
+ let Inst{10-6} = !if(ps.has_sdst, sdata{4-0}, ?);
+}
+
+def S_PREFETCH_INST_gfx12 : SMEM_Real_Prefetch_gfx12<0x24, S_PREFETCH_INST>;
+def S_PREFETCH_INST_PC_REL_gfx12 : SMEM_Real_Prefetch_gfx12<0x25, S_PREFETCH_INST_PC_REL>;
+def S_PREFETCH_DATA_gfx12 : SMEM_Real_Prefetch_gfx12<0x26, S_PREFETCH_DATA>;
+def S_BUFFER_PREFETCH_DATA_gfx12 : SMEM_Real_Prefetch_gfx12<0x27, S_BUFFER_PREFETCH_DATA>;
+def S_PREFETCH_DATA_PC_REL_gfx12 : SMEM_Real_Prefetch_gfx12<0x28, S_PREFETCH_DATA_PC_REL>;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 3aacde50523f4..68d561a0d9f78 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -2604,6 +2604,9 @@ static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) {
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
int64_t EncodedOffset) {
+ if (isGFX12Plus(ST))
+ return isUInt<23>(EncodedOffset);
+
return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
: isUInt<8>(EncodedOffset);
}
@@ -2611,6 +2614,9 @@ bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
int64_t EncodedOffset,
bool IsBuffer) {
+ if (isGFX12Plus(ST))
+ return isInt<24>(EncodedOffset);
+
return !IsBuffer &&
hasSMRDSignedImmOffset(ST) &&
isInt<21>(EncodedOffset);
@@ -2631,6 +2637,10 @@ uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST,
std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
int64_t ByteOffset, bool IsBuffer) {
+ if (isGFX12Plus(ST)) // 24 bit signed offsets
+ return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
+ : std::nullopt;
+
// The signed version is always a byte offset.
if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
assert(hasSMEMByteOffset(ST));
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_smem.s b/llvm/test/MC/AMDGPU/gfx12_asm_smem.s
new file mode 100644
index 0000000000000..ed7ad5bb0c4e8
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_smem.s
@@ -0,0 +1,35 @@
+// RUN: llvm-mc -arch=amdgcn -show-encoding -mcpu=gfx1200 %s | FileCheck --check-prefix=GFX12 %s
+
+//===----------------------------------------------------------------------===//
+// ENC_SMEM.
+//===----------------------------------------------------------------------===//
+
+s_prefetch_inst s[12:13], 16, s4, 2
+// GFX12: s_prefetch_inst s[12:13], 0x10, s4, 2 ; encoding: [0x86,0x80,0x04,0xf4,0x10,0x00,0x00,0x08]
+
+s_prefetch_inst s[14:15], 0, m0, 7
+// GFX12: s_prefetch_inst s[14:15], 0x0, m0, 7 ; encoding: [0xc7,0x81,0x04,0xf4,0x00,0x00,0x00,0xfa]
+
+s_prefetch_inst s[14:15], 0x7fffff, m0, 7
+// GFX12: s_prefetch_inst s[14:15], 0x7fffff, m0, 7 ; encoding: [0xc7,0x81,0x04,0xf4,0xff,0xff,0x7f,0xfa]
+
+s_prefetch_inst s[14:15], -1, m0, 7
+// GFX12: s_prefetch_inst s[14:15], -0x1, m0, 7 ; encoding: [0xc7,0x81,0x04,0xf4,0xff,0xff,0xff,0xfa]
+
+s_prefetch_inst s[14:15], 100, m0, 31
+// GFX12: s_prefetch_inst s[14:15], 0x64, m0, 31 ; encoding: [0xc7,0x87,0x04,0xf4,0x64,0x00,0x00,0xfa]
+
+s_prefetch_inst_pc_rel 100, s10, 7
+// GFX12: s_prefetch_inst_pc_rel 0x64, s10, 7 ; encoding: [0xc0,0xa1,0x04,0xf4,0x64,0x00,0x00,0x14]
+
+s_prefetch_data s[18:19], 100, s10, 7
+// GFX12: s_prefetch_data s[18:19], 0x64, s10, 7 ; encoding: [0xc9,0xc1,0x04,0xf4,0x64,0x00,0x00,0x14]
+
+s_prefetch_data_pc_rel 100, s10, 7
+// GFX12: s_prefetch_data_pc_rel 0x64, s10, 7 ; encoding: [0xc0,0x01,0x05,0xf4,0x64,0x00,0x00,0x14]
+
+s_buffer_prefetch_data s[20:23], 100, s10, 7
+// GFX12: s_buffer_prefetch_data s[20:23], 0x64, s10, 7 ; encoding: [0xca,0xe1,0x04,0xf4,0x64,0x00,0x00,0x14]
+
+s_buffer_prefetch_data s[20:23], 100, null, 7
+// GFX12: s_buffer_prefetch_data s[20:23], 0x64, null, 7 ; encoding: [0xca,0xe1,0x04,0xf4,0x64,0x00,0x00,0xf8]
diff --git a/llvm/test/MC/AMDGPU/gfx12_err.s b/llvm/test/MC/AMDGPU/gfx12_err.s
index 93e3cc1498d26..b23e60082ffef 100644
--- a/llvm/test/MC/AMDGPU/gfx12_err.s
+++ b/llvm/test/MC/AMDGPU/gfx12_err.s
@@ -41,3 +41,8 @@ image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_LOAD_HT scope:SCOPE
image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D scope:SCOPE_SE th:TH_LOAD_HT scope:SCOPE_SE
// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand
+
+s_prefetch_inst s[14:15], 0xffffff, m0, 7
+// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: expected a 24-bit signed offset
+// GFX12-ERR: s_prefetch_inst s[14:15], 0xffffff, m0, 7
+// GFX12-ERR: ^
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_smem.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_smem.txt
new file mode 100644
index 0000000000000..7843905797a62
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_smem.txt
@@ -0,0 +1,31 @@
+# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12 %s
+
+# GFX12: s_prefetch_inst s[12:13], 0x10, s4, 2 ; encoding: [0x86,0x80,0x04,0xf4,0x10,0x00,0x00,0x08]
+0x86,0x80,0x04,0xf4,0x10,0x00,0x00,0x08
+
+# GFX12: s_prefetch_inst s[14:15], 0x0, m0, 7 ; encoding: [0xc7,0x81,0x04,0xf4,0x00,0x00,0x00,0xfa]
+0xc7,0x81,0x04,0xf4,0x00,0x00,0x00,0xfa
+
+# GFX12: s_prefetch_inst s[14:15], 0x7fffff, m0, 7 ; encoding: [0xc7,0x81,0x04,0xf4,0xff,0xff,0x7f,0xfa]
+0xc7,0x81,0x04,0xf4,0xff,0xff,0x7f,0xfa
+
+# GFX12: s_prefetch_inst s[14:15], -0x1, m0, 7 ; encoding: [0xc7,0x81,0x04,0xf4,0xff,0xff,0xff,0xfa]
+0xc7,0x81,0x04,0xf4,0xff,0xff,0xff,0xfa
+
+# GFX12: s_prefetch_inst s[14:15], 0x64, m0, 31 ; encoding: [0xc7,0x87,0x04,0xf4,0x64,0x00,0x00,0xfa]
+0xc7,0x87,0x04,0xf4,0x64,0x00,0x00,0xfa
+
+# GFX12: s_prefetch_inst_pc_rel 0x64, s10, 7 ; encoding: [0xc0,0xa1,0x04,0xf4,0x64,0x00,0x00,0x14]
+0xc0,0xa1,0x04,0xf4,0x64,0x00,0x00,0x14
+
+# GFX12: s_prefetch_data s[18:19], 0x64, s10, 7 ; encoding: [0xc9,0xc1,0x04,0xf4,0x64,0x00,0x00,0x14]
+0xc9,0xc1,0x04,0xf4,0x64,0x00,0x00,0x14
+
+# GFX12: s_prefetch_data_pc_rel 0x64, s10, 7 ; encoding: [0xc0,0x01,0x05,0xf4,0x64,0x00,0x00,0x14]
+0xc0,0x01,0x05,0xf4,0x64,0x00,0x00,0x14
+
+# GFX12: s_buffer_prefetch_data s[20:23], 0x64, s10, 7 ; encoding: [0xca,0xe1,0x04,0xf4,0x64,0x00,0x00,0x14]
+0xca,0xe1,0x04,0xf4,0x64,0x00,0x00,0x14
+
+# GFX12: s_buffer_prefetch_data s[20:23], 0x64, null, 7 ; encoding: [0xca,0xe1,0x04,0xf4,0x64,0x00,0x00,0xf8]
+0xca,0xe1,0x04,0xf4,0x64,0x00,0x00,0xf8
More information about the llvm-commits
mailing list