[llvm] cb67b2c - [AMDGPU][GFX10] Support base+soffset+offset SMEM stores.

Ivan Kosarev via llvm-commits llvm-commits at lists.llvm.org
Thu May 12 00:49:06 PDT 2022


Author: Ivan Kosarev
Date: 2022-05-12T08:48:05+01:00
New Revision: cb67b2ccc4eb07dfa670b10a53ce96b82efb7b67

URL: https://github.com/llvm/llvm-project/commit/cb67b2ccc4eb07dfa670b10a53ce96b82efb7b67
DIFF: https://github.com/llvm/llvm-project/commit/cb67b2ccc4eb07dfa670b10a53ce96b82efb7b67.diff

LOG: [AMDGPU][GFX10] Support base+soffset+offset SMEM stores.

Also makes another step towards resolving
https://github.com/llvm/llvm-project/issues/38652

Reviewed By: foad, dp

Differential Revision: https://reviews.llvm.org/D125380

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SMInstructions.td
    llvm/test/MC/AMDGPU/gfx10_asm_smem.s
    llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index 69df8df2765db..4d1d18e22df2b 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -110,10 +110,11 @@ class SM_Load_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag>
   let has_dlc = 1;
 }
 
-class SM_Store_Pseudo <string opName, dag ins, string asmOps, list<dag> pattern = []>
-  : SM_Pseudo<opName, (outs), ins, asmOps, pattern> {
-  RegisterClass BaseClass;
-  RegisterClass SrcClass;
+class SM_Store_Pseudo <string opName, RegisterClass baseClass,
+                       RegisterClass srcClass, dag ins, string asmOps>
+  : SM_Pseudo<opName, (outs), ins, asmOps, []> {
+  RegisterClass BaseClass = baseClass;
+  RegisterClass SrcClass = srcClass;
   let mayLoad = 0;
   let mayStore = 1;
   let has_glc = 1;
@@ -177,23 +178,28 @@ multiclass SM_Pseudo_Loads<string opName,
 multiclass SM_Pseudo_Stores<string opName,
                            RegisterClass baseClass,
                            RegisterClass srcClass> {
-  def _IMM  : SM_Store_Pseudo <opName,
+  def _IMM : SM_Store_Pseudo <opName, baseClass, srcClass,
     (ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, CPol:$cpol),
-    " $sdata, $sbase, $offset$cpol", []> {
+    " $sdata, $sbase, $offset$cpol"> {
     let has_offset = 1;
-    let BaseClass = baseClass;
-    let SrcClass = srcClass;
     let PseudoInstr = opName # "_IMM";
   }
 
-  def _SGPR  : SM_Store_Pseudo <opName,
+  def _SGPR : SM_Store_Pseudo <opName, baseClass, srcClass,
     (ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soffset, CPol:$cpol),
-    " $sdata, $sbase, $soffset$cpol", []> {
+    " $sdata, $sbase, $soffset$cpol"> {
     let has_soffset = 1;
-    let BaseClass = baseClass;
-    let SrcClass = srcClass;
     let PseudoInstr = opName # "_SGPR";
   }
+
+  def _SGPR_IMM : SM_Store_Pseudo <opName, baseClass, srcClass,
+    (ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soffset, i32imm:$offset,
+     CPol:$cpol),
+    " $sdata, $sbase, $soffset$offset$cpol"> {
+    let has_offset = 1;
+    let has_soffset = 1;
+    let PseudoInstr = opName # "_SGPR_IMM";
+  }
 }
 
 multiclass SM_Pseudo_Discards<string opName> {
@@ -948,6 +954,11 @@ multiclass SM_Real_Stores_gfx10<bits<8> op, string ps,
   def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, sgprPs> {
     let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$soffset, CPol:$cpol);
   }
+
+  def _SGPR_IMM_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Store_Pseudo>(ps#_SGPR_IMM)> {
+    let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase,
+                             SReg_32:$soffset, smem_offset_mod:$offset, CPol:$cpol);
+  }
 }
 
 defm S_LOAD_DWORD            : SM_Real_Loads_gfx10<0x000, "S_LOAD_DWORD">;

diff  --git a/llvm/test/MC/AMDGPU/gfx10_asm_smem.s b/llvm/test/MC/AMDGPU/gfx10_asm_smem.s
index a4963e5c34e88..beb72ecbe0992 100644
--- a/llvm/test/MC/AMDGPU/gfx10_asm_smem.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_smem.s
@@ -581,6 +581,9 @@ s_store_dword s1, s[4:5], null
 s_store_dword s1, s[4:5], 0x0
 // GFX10: encoding: [0x42,0x00,0x40,0xf4,0x00,0x00,0x00,0xfa]
 
+s_store_dword s1, s[4:5], s0 offset:0x12345
+// GFX10: encoding: [0x42,0x00,0x40,0xf4,0x45,0x23,0x01,0x00]
+
 s_store_dword s1, s[4:5], s0 glc
 // GFX10: encoding: [0x42,0x00,0x41,0xf4,0x00,0x00,0x00,0x00]
 
@@ -593,6 +596,9 @@ s_store_dword s1, s[4:5], s0 glc dlc
 s_store_dword s1, s[4:5], 0x1234 glc dlc
 // GFX10: encoding: [0x42,0x40,0x41,0xf4,0x34,0x12,0x00,0xfa]
 
+s_store_dword s1, s[4:5], s0 offset:0x12345 glc dlc
+// GFX10: encoding: [0x42,0x40,0x41,0xf4,0x45,0x23,0x01,0x00]
+
 s_store_dwordx2 s[2:3], s[4:5], s0
 // GFX10: encoding: [0x82,0x00,0x44,0xf4,0x00,0x00,0x00,0x00]
 
@@ -632,6 +638,9 @@ s_store_dwordx2 s[2:3], s[4:5], null
 s_store_dwordx2 s[2:3], s[4:5], 0x0
 // GFX10: encoding: [0x82,0x00,0x44,0xf4,0x00,0x00,0x00,0xfa]
 
+s_store_dwordx2 s[2:3], s[4:5], s0 offset:0x12345
+// GFX10: encoding: [0x82,0x00,0x44,0xf4,0x45,0x23,0x01,0x00]
+
 s_store_dwordx2 s[2:3], s[4:5], s0 glc
 // GFX10: encoding: [0x82,0x00,0x45,0xf4,0x00,0x00,0x00,0x00]
 
@@ -644,6 +653,9 @@ s_store_dwordx2 s[2:3], s[4:5], s0 glc dlc
 s_store_dwordx2 s[2:3], s[4:5], 0x1234 glc dlc
 // GFX10: encoding: [0x82,0x40,0x45,0xf4,0x34,0x12,0x00,0xfa]
 
+s_store_dwordx2 s[2:3], s[4:5], s0 offset:0x12345 glc dlc
+// GFX10: encoding: [0x82,0x40,0x45,0xf4,0x45,0x23,0x01,0x00]
+
 s_store_dwordx4 s[4:7], s[4:5], s0
 // GFX10: encoding: [0x02,0x01,0x48,0xf4,0x00,0x00,0x00,0x00]
 
@@ -680,6 +692,9 @@ s_store_dwordx4 s[4:7], s[4:5], null
 s_store_dwordx4 s[4:7], s[4:5], 0x0
 // GFX10: encoding: [0x02,0x01,0x48,0xf4,0x00,0x00,0x00,0xfa]
 
+s_store_dwordx4 s[4:7], s[4:5], s0 offset:0x12345
+// GFX10: encoding: [0x02,0x01,0x48,0xf4,0x45,0x23,0x01,0x00]
+
 s_store_dwordx4 s[4:7], s[4:5], s0 glc
 // GFX10: encoding: [0x02,0x01,0x49,0xf4,0x00,0x00,0x00,0x00]
 
@@ -692,6 +707,9 @@ s_store_dwordx4 s[4:7], s[4:5], s0 glc dlc
 s_store_dwordx4 s[4:7], s[4:5], 0x1234 glc dlc
 // GFX10: encoding: [0x02,0x41,0x49,0xf4,0x34,0x12,0x00,0xfa]
 
+s_store_dwordx4 s[4:7], s[4:5], s0 offset:0x12345 glc dlc
+// GFX10: encoding: [0x02,0x41,0x49,0xf4,0x45,0x23,0x01,0x00]
+
 s_buffer_store_dword s1, s[8:11], s0
 // GFX10: encoding: [0x44,0x00,0x60,0xf4,0x00,0x00,0x00,0x00]
 
@@ -728,6 +746,9 @@ s_buffer_store_dword s1, s[8:11], null
 s_buffer_store_dword s1, s[8:11], 0x0
 // GFX10: encoding: [0x44,0x00,0x60,0xf4,0x00,0x00,0x00,0xfa]
 
+s_buffer_store_dword s1, s[8:11], s0 offset:0x12345
+// GFX10: encoding: [0x44,0x00,0x60,0xf4,0x45,0x23,0x01,0x00]
+
 s_buffer_store_dword s1, s[8:11], s0 glc
 // GFX10: encoding: [0x44,0x00,0x61,0xf4,0x00,0x00,0x00,0x00]
 
@@ -740,6 +761,9 @@ s_buffer_store_dword s1, s[8:11], s0 glc dlc
 s_buffer_store_dword s1, s[8:11], 0x1234 glc dlc
 // GFX10: encoding: [0x44,0x40,0x61,0xf4,0x34,0x12,0x00,0xfa]
 
+s_buffer_store_dword s1, s[8:11], s0 offset:0x12345 glc dlc
+// GFX10: encoding: [0x44,0x40,0x61,0xf4,0x45,0x23,0x01,0x00]
+
 s_buffer_store_dwordx2 s[2:3], s[8:11], s0
 // GFX10: encoding: [0x84,0x00,0x64,0xf4,0x00,0x00,0x00,0x00]
 
@@ -776,6 +800,9 @@ s_buffer_store_dwordx2 s[2:3], s[8:11], null
 s_buffer_store_dwordx2 s[2:3], s[8:11], 0x0
 // GFX10: encoding: [0x84,0x00,0x64,0xf4,0x00,0x00,0x00,0xfa]
 
+s_buffer_store_dwordx2 s[2:3], s[8:11], s0 offset:0x12345
+// GFX10: encoding: [0x84,0x00,0x64,0xf4,0x45,0x23,0x01,0x00]
+
 s_buffer_store_dwordx2 s[2:3], s[8:11], s0 glc
 // GFX10: encoding: [0x84,0x00,0x65,0xf4,0x00,0x00,0x00,0x00]
 
@@ -788,6 +815,9 @@ s_buffer_store_dwordx2 s[2:3], s[8:11], s0 glc dlc
 s_buffer_store_dwordx2 s[2:3], s[8:11], 0x1234 glc dlc
 // GFX10: encoding: [0x84,0x40,0x65,0xf4,0x34,0x12,0x00,0xfa]
 
+s_buffer_store_dwordx2 s[2:3], s[8:11], s0 offset:0x12345 glc dlc
+// GFX10: encoding: [0x84,0x40,0x65,0xf4,0x45,0x23,0x01,0x00]
+
 s_buffer_store_dwordx4 s[4:7], s[8:11], s0
 // GFX10: encoding: [0x04,0x01,0x68,0xf4,0x00,0x00,0x00,0x00]
 
@@ -821,6 +851,9 @@ s_buffer_store_dwordx4 s[4:7], s[8:11], null
 s_buffer_store_dwordx4 s[4:7], s[8:11], 0x0
 // GFX10: encoding: [0x04,0x01,0x68,0xf4,0x00,0x00,0x00,0xfa]
 
+s_buffer_store_dwordx4 s[4:7], s[8:11], s0 offset:0x12345
+// GFX10: encoding: [0x04,0x01,0x68,0xf4,0x45,0x23,0x01,0x00]
+
 s_buffer_store_dwordx4 s[4:7], s[8:11], s0 glc
 // GFX10: encoding: [0x04,0x01,0x69,0xf4,0x00,0x00,0x00,0x00]
 
@@ -833,6 +866,9 @@ s_buffer_store_dwordx4 s[4:7], s[8:11], s0 glc dlc
 s_buffer_store_dwordx4 s[4:7], s[8:11], 0x1234 glc dlc
 // GFX10: encoding: [0x04,0x41,0x69,0xf4,0x34,0x12,0x00,0xfa]
 
+s_buffer_store_dwordx4 s[4:7], s[8:11], s0 offset:0x12345 glc dlc
+// GFX10: encoding: [0x04,0x41,0x69,0xf4,0x45,0x23,0x01,0x00]
+
 s_memrealtime s[10:11]
 // GFX10: encoding: [0x80,0x02,0x94,0xf4,0x00,0x00,0x00,0x00]
 
@@ -893,6 +929,9 @@ s_scratch_store_dword s101, s[4:5], s0
 s_scratch_store_dword s1, s[4:5], 0x123 glc
 // GFX10: encoding: [0x42,0x00,0x55,0xf4,0x23,0x01,0x00,0xfa]
 
+s_scratch_store_dword s1, s[4:5], s0 offset:0x12345 glc
+// GFX10: encoding: [0x42,0x00,0x55,0xf4,0x45,0x23,0x01,0x00]
+
 s_scratch_store_dwordx2 s[2:3], s[4:5], s101 glc
 // GFX10: encoding: [0x82,0x00,0x59,0xf4,0x00,0x00,0x00,0xca]
 

diff  --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
index a6ec6a9aea692..4d57f305d7084 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
@@ -11591,6 +11591,9 @@
 # GFX10: s_buffer_store_dword s1, s[8:11], 0x1234 glc dlc ; encoding: [0x44,0x40,0x61,0xf4,0x34,0x12,0x00,0xfa]
 0x44,0x40,0x61,0xf4,0x34,0x12,0x00,0xfa
 
+# GFX10: s_buffer_store_dword s1, s[8:11], s0 offset:0x12345 glc dlc ; encoding: [0x44,0x40,0x61,0xf4,0x45,0x23,0x01,0x00]
+0x44,0x40,0x61,0xf4,0x45,0x23,0x01,0x00
+
 # GFX10: s_buffer_store_dword s1, s[8:11], m0    ; encoding: [0x44,0x00,0x60,0xf4,0x00,0x00,0x00,0xf8]
 0x44,0x00,0x60,0xf4,0x00,0x00,0x00,0xf8
 
@@ -11639,6 +11642,9 @@
 # GFX10: s_buffer_store_dwordx2 s[2:3], s[8:11], 0x1234 glc dlc ; encoding: [0x84,0x40,0x65,0xf4,0x34,0x12,0x00,0xfa]
 0x84,0x40,0x65,0xf4,0x34,0x12,0x00,0xfa
 
+# GFX10: s_buffer_store_dwordx2 s[2:3], s[8:11], s0 offset:0x12345 glc dlc ; encoding: [0x84,0x40,0x65,0xf4,0x45,0x23,0x01,0x00]
+0x84,0x40,0x65,0xf4,0x45,0x23,0x01,0x00
+
 # GFX10: s_buffer_store_dwordx2 s[2:3], s[8:11], m0 ; encoding: [0x84,0x00,0x64,0xf4,0x00,0x00,0x00,0xf8]
 0x84,0x00,0x64,0xf4,0x00,0x00,0x00,0xf8
 
@@ -11681,6 +11687,9 @@
 # GFX10: s_buffer_store_dwordx4 s[4:7], s[8:11], 0x1234 glc dlc ; encoding: [0x04,0x41,0x69,0xf4,0x34,0x12,0x00,0xfa]
 0x04,0x41,0x69,0xf4,0x34,0x12,0x00,0xfa
 
+# GFX10: s_buffer_store_dwordx4 s[4:7], s[8:11], s0 offset:0x12345 glc dlc ; encoding: [0x04,0x41,0x69,0xf4,0x45,0x23,0x01,0x00]
+0x04,0x41,0x69,0xf4,0x45,0x23,0x01,0x00
+
 # GFX10: s_buffer_store_dwordx4 s[4:7], s[8:11], m0 ; encoding: [0x04,0x01,0x68,0xf4,0x00,0x00,0x00,0xf8]
 0x04,0x01,0x68,0xf4,0x00,0x00,0x00,0xf8
 
@@ -18086,6 +18095,9 @@
 # GFX10: s_scratch_store_dword s1, s[4:5], 0x123 glc ; encoding: [0x42,0x00,0x55,0xf4,0x23,0x01,0x00,0xfa]
 0x42,0x00,0x55,0xf4,0x23,0x01,0x00,0xfa
 
+# GFX10: s_scratch_store_dword s1, s[4:5], s0 offset:0x12345 glc ; encoding: [0x42,0x00,0x55,0xf4,0x45,0x23,0x01,0x00]
+0x42,0x00,0x55,0xf4,0x45,0x23,0x01,0x00
+
 # GFX10: s_scratch_store_dword s101, s[4:5], s0  ; encoding: [0x42,0x19,0x54,0xf4,0x00,0x00,0x00,0x00]
 0x42,0x19,0x54,0xf4,0x00,0x00,0x00,0x00
 
@@ -18323,6 +18335,9 @@
 # GFX10: s_store_dword s1, s[4:5], 0x1234 glc dlc ; encoding: [0x42,0x40,0x41,0xf4,0x34,0x12,0x00,0xfa]
 0x42,0x40,0x41,0xf4,0x34,0x12,0x00,0xfa
 
+# GFX10: s_store_dword s1, s[4:5], s0 offset:0x12345 glc dlc ; encoding: [0x42,0x40,0x41,0xf4,0x45,0x23,0x01,0x00]
+0x42,0x40,0x41,0xf4,0x45,0x23,0x01,0x00
+
 # GFX10: s_store_dword s1, s[4:5], m0            ; encoding: [0x42,0x00,0x40,0xf4,0x00,0x00,0x00,0xf8]
 0x42,0x00,0x40,0xf4,0x00,0x00,0x00,0xf8
 
@@ -18374,6 +18389,9 @@
 # GFX10: s_store_dwordx2 s[2:3], s[4:5], 0x1234 glc dlc ; encoding: [0x82,0x40,0x45,0xf4,0x34,0x12,0x00,0xfa]
 0x82,0x40,0x45,0xf4,0x34,0x12,0x00,0xfa
 
+# GFX10: s_store_dwordx2 s[2:3], s[4:5], s0 offset:0x12345 glc dlc ; encoding: [0x82,0x40,0x45,0xf4,0x45,0x23,0x01,0x00]
+0x82,0x40,0x45,0xf4,0x45,0x23,0x01,0x00
+
 # GFX10: s_store_dwordx2 s[2:3], s[4:5], m0      ; encoding: [0x82,0x00,0x44,0xf4,0x00,0x00,0x00,0xf8]
 0x82,0x00,0x44,0xf4,0x00,0x00,0x00,0xf8
 
@@ -18419,6 +18437,9 @@
 # GFX10: s_store_dwordx4 s[4:7], s[4:5], 0x1234 glc dlc ; encoding: [0x02,0x41,0x49,0xf4,0x34,0x12,0x00,0xfa]
 0x02,0x41,0x49,0xf4,0x34,0x12,0x00,0xfa
 
+# GFX10: s_store_dwordx4 s[4:7], s[4:5], s0 offset:0x12345 glc dlc ; encoding: [0x02,0x41,0x49,0xf4,0x45,0x23,0x01,0x00]
+0x02,0x41,0x49,0xf4,0x45,0x23,0x01,0x00
+
 # GFX10: s_store_dwordx4 s[4:7], s[4:5], m0      ; encoding: [0x02,0x01,0x48,0xf4,0x00,0x00,0x00,0xf8]
 0x02,0x01,0x48,0xf4,0x00,0x00,0x00,0xf8
 


        


More information about the llvm-commits mailing list