[llvm] cb67b2c - [AMDGPU][GFX10] Support base+soffset+offset SMEM stores.
Ivan Kosarev via llvm-commits
llvm-commits at lists.llvm.org
Thu May 12 00:49:06 PDT 2022
Author: Ivan Kosarev
Date: 2022-05-12T08:48:05+01:00
New Revision: cb67b2ccc4eb07dfa670b10a53ce96b82efb7b67
URL: https://github.com/llvm/llvm-project/commit/cb67b2ccc4eb07dfa670b10a53ce96b82efb7b67
DIFF: https://github.com/llvm/llvm-project/commit/cb67b2ccc4eb07dfa670b10a53ce96b82efb7b67.diff
LOG: [AMDGPU][GFX10] Support base+soffset+offset SMEM stores.
Also makes another step towards resolving
https://github.com/llvm/llvm-project/issues/38652
Reviewed By: foad, dp
Differential Revision: https://reviews.llvm.org/D125380
Added:
Modified:
llvm/lib/Target/AMDGPU/SMInstructions.td
llvm/test/MC/AMDGPU/gfx10_asm_smem.s
llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index 69df8df2765db..4d1d18e22df2b 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -110,10 +110,11 @@ class SM_Load_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag>
let has_dlc = 1;
}
-class SM_Store_Pseudo <string opName, dag ins, string asmOps, list<dag> pattern = []>
- : SM_Pseudo<opName, (outs), ins, asmOps, pattern> {
- RegisterClass BaseClass;
- RegisterClass SrcClass;
+class SM_Store_Pseudo <string opName, RegisterClass baseClass,
+ RegisterClass srcClass, dag ins, string asmOps>
+ : SM_Pseudo<opName, (outs), ins, asmOps, []> {
+ RegisterClass BaseClass = baseClass;
+ RegisterClass SrcClass = srcClass;
let mayLoad = 0;
let mayStore = 1;
let has_glc = 1;
@@ -177,23 +178,28 @@ multiclass SM_Pseudo_Loads<string opName,
multiclass SM_Pseudo_Stores<string opName,
RegisterClass baseClass,
RegisterClass srcClass> {
- def _IMM : SM_Store_Pseudo <opName,
+ def _IMM : SM_Store_Pseudo <opName, baseClass, srcClass,
(ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, CPol:$cpol),
- " $sdata, $sbase, $offset$cpol", []> {
+ " $sdata, $sbase, $offset$cpol"> {
let has_offset = 1;
- let BaseClass = baseClass;
- let SrcClass = srcClass;
let PseudoInstr = opName # "_IMM";
}
- def _SGPR : SM_Store_Pseudo <opName,
+ def _SGPR : SM_Store_Pseudo <opName, baseClass, srcClass,
(ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soffset, CPol:$cpol),
- " $sdata, $sbase, $soffset$cpol", []> {
+ " $sdata, $sbase, $soffset$cpol"> {
let has_soffset = 1;
- let BaseClass = baseClass;
- let SrcClass = srcClass;
let PseudoInstr = opName # "_SGPR";
}
+
+ def _SGPR_IMM : SM_Store_Pseudo <opName, baseClass, srcClass,
+ (ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soffset, i32imm:$offset,
+ CPol:$cpol),
+ " $sdata, $sbase, $soffset$offset$cpol"> {
+ let has_offset = 1;
+ let has_soffset = 1;
+ let PseudoInstr = opName # "_SGPR_IMM";
+ }
}
multiclass SM_Pseudo_Discards<string opName> {
@@ -948,6 +954,11 @@ multiclass SM_Real_Stores_gfx10<bits<8> op, string ps,
def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, sgprPs> {
let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$soffset, CPol:$cpol);
}
+
+ def _SGPR_IMM_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Store_Pseudo>(ps#_SGPR_IMM)> {
+ let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase,
+ SReg_32:$soffset, smem_offset_mod:$offset, CPol:$cpol);
+ }
}
defm S_LOAD_DWORD : SM_Real_Loads_gfx10<0x000, "S_LOAD_DWORD">;
diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_smem.s b/llvm/test/MC/AMDGPU/gfx10_asm_smem.s
index a4963e5c34e88..beb72ecbe0992 100644
--- a/llvm/test/MC/AMDGPU/gfx10_asm_smem.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_smem.s
@@ -581,6 +581,9 @@ s_store_dword s1, s[4:5], null
s_store_dword s1, s[4:5], 0x0
// GFX10: encoding: [0x42,0x00,0x40,0xf4,0x00,0x00,0x00,0xfa]
+s_store_dword s1, s[4:5], s0 offset:0x12345
+// GFX10: encoding: [0x42,0x00,0x40,0xf4,0x45,0x23,0x01,0x00]
+
s_store_dword s1, s[4:5], s0 glc
// GFX10: encoding: [0x42,0x00,0x41,0xf4,0x00,0x00,0x00,0x00]
@@ -593,6 +596,9 @@ s_store_dword s1, s[4:5], s0 glc dlc
s_store_dword s1, s[4:5], 0x1234 glc dlc
// GFX10: encoding: [0x42,0x40,0x41,0xf4,0x34,0x12,0x00,0xfa]
+s_store_dword s1, s[4:5], s0 offset:0x12345 glc dlc
+// GFX10: encoding: [0x42,0x40,0x41,0xf4,0x45,0x23,0x01,0x00]
+
s_store_dwordx2 s[2:3], s[4:5], s0
// GFX10: encoding: [0x82,0x00,0x44,0xf4,0x00,0x00,0x00,0x00]
@@ -632,6 +638,9 @@ s_store_dwordx2 s[2:3], s[4:5], null
s_store_dwordx2 s[2:3], s[4:5], 0x0
// GFX10: encoding: [0x82,0x00,0x44,0xf4,0x00,0x00,0x00,0xfa]
+s_store_dwordx2 s[2:3], s[4:5], s0 offset:0x12345
+// GFX10: encoding: [0x82,0x00,0x44,0xf4,0x45,0x23,0x01,0x00]
+
s_store_dwordx2 s[2:3], s[4:5], s0 glc
// GFX10: encoding: [0x82,0x00,0x45,0xf4,0x00,0x00,0x00,0x00]
@@ -644,6 +653,9 @@ s_store_dwordx2 s[2:3], s[4:5], s0 glc dlc
s_store_dwordx2 s[2:3], s[4:5], 0x1234 glc dlc
// GFX10: encoding: [0x82,0x40,0x45,0xf4,0x34,0x12,0x00,0xfa]
+s_store_dwordx2 s[2:3], s[4:5], s0 offset:0x12345 glc dlc
+// GFX10: encoding: [0x82,0x40,0x45,0xf4,0x45,0x23,0x01,0x00]
+
s_store_dwordx4 s[4:7], s[4:5], s0
// GFX10: encoding: [0x02,0x01,0x48,0xf4,0x00,0x00,0x00,0x00]
@@ -680,6 +692,9 @@ s_store_dwordx4 s[4:7], s[4:5], null
s_store_dwordx4 s[4:7], s[4:5], 0x0
// GFX10: encoding: [0x02,0x01,0x48,0xf4,0x00,0x00,0x00,0xfa]
+s_store_dwordx4 s[4:7], s[4:5], s0 offset:0x12345
+// GFX10: encoding: [0x02,0x01,0x48,0xf4,0x45,0x23,0x01,0x00]
+
s_store_dwordx4 s[4:7], s[4:5], s0 glc
// GFX10: encoding: [0x02,0x01,0x49,0xf4,0x00,0x00,0x00,0x00]
@@ -692,6 +707,9 @@ s_store_dwordx4 s[4:7], s[4:5], s0 glc dlc
s_store_dwordx4 s[4:7], s[4:5], 0x1234 glc dlc
// GFX10: encoding: [0x02,0x41,0x49,0xf4,0x34,0x12,0x00,0xfa]
+s_store_dwordx4 s[4:7], s[4:5], s0 offset:0x12345 glc dlc
+// GFX10: encoding: [0x02,0x41,0x49,0xf4,0x45,0x23,0x01,0x00]
+
s_buffer_store_dword s1, s[8:11], s0
// GFX10: encoding: [0x44,0x00,0x60,0xf4,0x00,0x00,0x00,0x00]
@@ -728,6 +746,9 @@ s_buffer_store_dword s1, s[8:11], null
s_buffer_store_dword s1, s[8:11], 0x0
// GFX10: encoding: [0x44,0x00,0x60,0xf4,0x00,0x00,0x00,0xfa]
+s_buffer_store_dword s1, s[8:11], s0 offset:0x12345
+// GFX10: encoding: [0x44,0x00,0x60,0xf4,0x45,0x23,0x01,0x00]
+
s_buffer_store_dword s1, s[8:11], s0 glc
// GFX10: encoding: [0x44,0x00,0x61,0xf4,0x00,0x00,0x00,0x00]
@@ -740,6 +761,9 @@ s_buffer_store_dword s1, s[8:11], s0 glc dlc
s_buffer_store_dword s1, s[8:11], 0x1234 glc dlc
// GFX10: encoding: [0x44,0x40,0x61,0xf4,0x34,0x12,0x00,0xfa]
+s_buffer_store_dword s1, s[8:11], s0 offset:0x12345 glc dlc
+// GFX10: encoding: [0x44,0x40,0x61,0xf4,0x45,0x23,0x01,0x00]
+
s_buffer_store_dwordx2 s[2:3], s[8:11], s0
// GFX10: encoding: [0x84,0x00,0x64,0xf4,0x00,0x00,0x00,0x00]
@@ -776,6 +800,9 @@ s_buffer_store_dwordx2 s[2:3], s[8:11], null
s_buffer_store_dwordx2 s[2:3], s[8:11], 0x0
// GFX10: encoding: [0x84,0x00,0x64,0xf4,0x00,0x00,0x00,0xfa]
+s_buffer_store_dwordx2 s[2:3], s[8:11], s0 offset:0x12345
+// GFX10: encoding: [0x84,0x00,0x64,0xf4,0x45,0x23,0x01,0x00]
+
s_buffer_store_dwordx2 s[2:3], s[8:11], s0 glc
// GFX10: encoding: [0x84,0x00,0x65,0xf4,0x00,0x00,0x00,0x00]
@@ -788,6 +815,9 @@ s_buffer_store_dwordx2 s[2:3], s[8:11], s0 glc dlc
s_buffer_store_dwordx2 s[2:3], s[8:11], 0x1234 glc dlc
// GFX10: encoding: [0x84,0x40,0x65,0xf4,0x34,0x12,0x00,0xfa]
+s_buffer_store_dwordx2 s[2:3], s[8:11], s0 offset:0x12345 glc dlc
+// GFX10: encoding: [0x84,0x40,0x65,0xf4,0x45,0x23,0x01,0x00]
+
s_buffer_store_dwordx4 s[4:7], s[8:11], s0
// GFX10: encoding: [0x04,0x01,0x68,0xf4,0x00,0x00,0x00,0x00]
@@ -821,6 +851,9 @@ s_buffer_store_dwordx4 s[4:7], s[8:11], null
s_buffer_store_dwordx4 s[4:7], s[8:11], 0x0
// GFX10: encoding: [0x04,0x01,0x68,0xf4,0x00,0x00,0x00,0xfa]
+s_buffer_store_dwordx4 s[4:7], s[8:11], s0 offset:0x12345
+// GFX10: encoding: [0x04,0x01,0x68,0xf4,0x45,0x23,0x01,0x00]
+
s_buffer_store_dwordx4 s[4:7], s[8:11], s0 glc
// GFX10: encoding: [0x04,0x01,0x69,0xf4,0x00,0x00,0x00,0x00]
@@ -833,6 +866,9 @@ s_buffer_store_dwordx4 s[4:7], s[8:11], s0 glc dlc
s_buffer_store_dwordx4 s[4:7], s[8:11], 0x1234 glc dlc
// GFX10: encoding: [0x04,0x41,0x69,0xf4,0x34,0x12,0x00,0xfa]
+s_buffer_store_dwordx4 s[4:7], s[8:11], s0 offset:0x12345 glc dlc
+// GFX10: encoding: [0x04,0x41,0x69,0xf4,0x45,0x23,0x01,0x00]
+
s_memrealtime s[10:11]
// GFX10: encoding: [0x80,0x02,0x94,0xf4,0x00,0x00,0x00,0x00]
@@ -893,6 +929,9 @@ s_scratch_store_dword s101, s[4:5], s0
s_scratch_store_dword s1, s[4:5], 0x123 glc
// GFX10: encoding: [0x42,0x00,0x55,0xf4,0x23,0x01,0x00,0xfa]
+s_scratch_store_dword s1, s[4:5], s0 offset:0x12345 glc
+// GFX10: encoding: [0x42,0x00,0x55,0xf4,0x45,0x23,0x01,0x00]
+
s_scratch_store_dwordx2 s[2:3], s[4:5], s101 glc
// GFX10: encoding: [0x82,0x00,0x59,0xf4,0x00,0x00,0x00,0xca]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
index a6ec6a9aea692..4d57f305d7084 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
@@ -11591,6 +11591,9 @@
# GFX10: s_buffer_store_dword s1, s[8:11], 0x1234 glc dlc ; encoding: [0x44,0x40,0x61,0xf4,0x34,0x12,0x00,0xfa]
0x44,0x40,0x61,0xf4,0x34,0x12,0x00,0xfa
+# GFX10: s_buffer_store_dword s1, s[8:11], s0 offset:0x12345 glc dlc ; encoding: [0x44,0x40,0x61,0xf4,0x45,0x23,0x01,0x00]
+0x44,0x40,0x61,0xf4,0x45,0x23,0x01,0x00
+
# GFX10: s_buffer_store_dword s1, s[8:11], m0 ; encoding: [0x44,0x00,0x60,0xf4,0x00,0x00,0x00,0xf8]
0x44,0x00,0x60,0xf4,0x00,0x00,0x00,0xf8
@@ -11639,6 +11642,9 @@
# GFX10: s_buffer_store_dwordx2 s[2:3], s[8:11], 0x1234 glc dlc ; encoding: [0x84,0x40,0x65,0xf4,0x34,0x12,0x00,0xfa]
0x84,0x40,0x65,0xf4,0x34,0x12,0x00,0xfa
+# GFX10: s_buffer_store_dwordx2 s[2:3], s[8:11], s0 offset:0x12345 glc dlc ; encoding: [0x84,0x40,0x65,0xf4,0x45,0x23,0x01,0x00]
+0x84,0x40,0x65,0xf4,0x45,0x23,0x01,0x00
+
# GFX10: s_buffer_store_dwordx2 s[2:3], s[8:11], m0 ; encoding: [0x84,0x00,0x64,0xf4,0x00,0x00,0x00,0xf8]
0x84,0x00,0x64,0xf4,0x00,0x00,0x00,0xf8
@@ -11681,6 +11687,9 @@
# GFX10: s_buffer_store_dwordx4 s[4:7], s[8:11], 0x1234 glc dlc ; encoding: [0x04,0x41,0x69,0xf4,0x34,0x12,0x00,0xfa]
0x04,0x41,0x69,0xf4,0x34,0x12,0x00,0xfa
+# GFX10: s_buffer_store_dwordx4 s[4:7], s[8:11], s0 offset:0x12345 glc dlc ; encoding: [0x04,0x41,0x69,0xf4,0x45,0x23,0x01,0x00]
+0x04,0x41,0x69,0xf4,0x45,0x23,0x01,0x00
+
# GFX10: s_buffer_store_dwordx4 s[4:7], s[8:11], m0 ; encoding: [0x04,0x01,0x68,0xf4,0x00,0x00,0x00,0xf8]
0x04,0x01,0x68,0xf4,0x00,0x00,0x00,0xf8
@@ -18086,6 +18095,9 @@
# GFX10: s_scratch_store_dword s1, s[4:5], 0x123 glc ; encoding: [0x42,0x00,0x55,0xf4,0x23,0x01,0x00,0xfa]
0x42,0x00,0x55,0xf4,0x23,0x01,0x00,0xfa
+# GFX10: s_scratch_store_dword s1, s[4:5], s0 offset:0x12345 glc ; encoding: [0x42,0x00,0x55,0xf4,0x45,0x23,0x01,0x00]
+0x42,0x00,0x55,0xf4,0x45,0x23,0x01,0x00
+
# GFX10: s_scratch_store_dword s101, s[4:5], s0 ; encoding: [0x42,0x19,0x54,0xf4,0x00,0x00,0x00,0x00]
0x42,0x19,0x54,0xf4,0x00,0x00,0x00,0x00
@@ -18323,6 +18335,9 @@
# GFX10: s_store_dword s1, s[4:5], 0x1234 glc dlc ; encoding: [0x42,0x40,0x41,0xf4,0x34,0x12,0x00,0xfa]
0x42,0x40,0x41,0xf4,0x34,0x12,0x00,0xfa
+# GFX10: s_store_dword s1, s[4:5], s0 offset:0x12345 glc dlc ; encoding: [0x42,0x40,0x41,0xf4,0x45,0x23,0x01,0x00]
+0x42,0x40,0x41,0xf4,0x45,0x23,0x01,0x00
+
# GFX10: s_store_dword s1, s[4:5], m0 ; encoding: [0x42,0x00,0x40,0xf4,0x00,0x00,0x00,0xf8]
0x42,0x00,0x40,0xf4,0x00,0x00,0x00,0xf8
@@ -18374,6 +18389,9 @@
# GFX10: s_store_dwordx2 s[2:3], s[4:5], 0x1234 glc dlc ; encoding: [0x82,0x40,0x45,0xf4,0x34,0x12,0x00,0xfa]
0x82,0x40,0x45,0xf4,0x34,0x12,0x00,0xfa
+# GFX10: s_store_dwordx2 s[2:3], s[4:5], s0 offset:0x12345 glc dlc ; encoding: [0x82,0x40,0x45,0xf4,0x45,0x23,0x01,0x00]
+0x82,0x40,0x45,0xf4,0x45,0x23,0x01,0x00
+
# GFX10: s_store_dwordx2 s[2:3], s[4:5], m0 ; encoding: [0x82,0x00,0x44,0xf4,0x00,0x00,0x00,0xf8]
0x82,0x00,0x44,0xf4,0x00,0x00,0x00,0xf8
@@ -18419,6 +18437,9 @@
# GFX10: s_store_dwordx4 s[4:7], s[4:5], 0x1234 glc dlc ; encoding: [0x02,0x41,0x49,0xf4,0x34,0x12,0x00,0xfa]
0x02,0x41,0x49,0xf4,0x34,0x12,0x00,0xfa
+# GFX10: s_store_dwordx4 s[4:7], s[4:5], s0 offset:0x12345 glc dlc ; encoding: [0x02,0x41,0x49,0xf4,0x45,0x23,0x01,0x00]
+0x02,0x41,0x49,0xf4,0x45,0x23,0x01,0x00
+
# GFX10: s_store_dwordx4 s[4:7], s[4:5], m0 ; encoding: [0x02,0x01,0x48,0xf4,0x00,0x00,0x00,0xf8]
0x02,0x01,0x48,0xf4,0x00,0x00,0x00,0xf8
More information about the llvm-commits
mailing list