[llvm] 60d6fbb - [AMDGPU][GFX9][GFX10] Support base+soffset+offset SMEM atomics.

Ivan Kosarev via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 10 05:23:26 PDT 2022


Author: Ivan Kosarev
Date: 2022-06-10T13:22:41+01:00
New Revision: 60d6fbb62110f673477291e135c6c6b2c721f2ee

URL: https://github.com/llvm/llvm-project/commit/60d6fbb62110f673477291e135c6c6b2c721f2ee
DIFF: https://github.com/llvm/llvm-project/commit/60d6fbb62110f673477291e135c6c6b2c721f2ee.diff

LOG: [AMDGPU][GFX9][GFX10] Support base+soffset+offset SMEM atomics.

Resolves a part of
https://github.com/llvm/llvm-project/issues/38652

Reviewed By: dp

Differential Revision: https://reviews.llvm.org/D127314

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SMInstructions.td
    llvm/test/MC/AMDGPU/gfx10_asm_smem.s
    llvm/test/MC/AMDGPU/gfx9_asm_smem.s
    llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
    llvm/test/MC/Disassembler/AMDGPU/smem_gfx9.txt

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index 530835bc22c8b..882d13402a192 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -87,6 +87,21 @@ class SM_Real <SM_Pseudo ps, string opName = ps.Mnemonic>
   bits<5> cpol;
 }
 
+class OffsetMode<bit hasOffset, bit hasSOffset, string variant,
+                 dag ins, string asm> {
+  bit HasOffset = hasOffset;
+  bit HasSOffset = hasSOffset;
+  string Variant = variant;
+  dag Ins = ins;
+  string Asm = asm;
+}
+
+def IMM_Offset : OffsetMode<1, 0, "_IMM", (ins smem_offset:$offset), "$offset">;
+def SGPR_Offset : OffsetMode<0, 1, "_SGPR", (ins SReg_32:$soffset), "$soffset">;
+def SGPR_IMM_Offset : OffsetMode<1, 1, "_SGPR_IMM",
+                                 (ins SReg_32:$soffset, smem_offset_mod:$offset),
+                                 "$soffset$offset">;
+
 class SM_Probe_Pseudo <string opName, string variant, RegisterClass baseClass,
                        dag offsets, string asmOffsets,
                        bit hasOffset, bit hasSOffset>
@@ -282,23 +297,21 @@ class SM_Atomic_Pseudo <string opName,
 class SM_Pseudo_Atomic<string opName,
                        RegisterClass baseClass,
                        RegisterClass dataClass,
-                       bit isImm,
+                       OffsetMode offsets,
                        bit isRet,
-                       string opNameWithSuffix = opName # !if(isImm,
-                                 !if(isRet, "_IMM_RTN", "_IMM"),
-                                 !if(isRet, "_SGPR_RTN", "_SGPR")),
+                       string opNameWithSuffix =
+                         opName # offsets.Variant # !if(isRet, "_RTN", ""),
                        Operand CPolTy = !if(isRet, CPol_GLC1, CPol)> :
   SM_Atomic_Pseudo<opName,
                    !if(isRet, (outs dataClass:$sdst), (outs)),
-                   !if(isImm,
-                       (ins dataClass:$sdata, baseClass:$sbase, smem_offset:$offset, CPolTy:$cpol),
-                       (ins dataClass:$sdata, baseClass:$sbase, SReg_32:$soffset, CPolTy:$cpol)),
-                   !if(isRet, " $sdst", " $sdata") # ", $sbase, " #
-                     !if(isImm, "$offset", "$soffset") # "$cpol",
+                   !con((ins dataClass:$sdata, baseClass:$sbase), offsets.Ins,
+                        (ins CPolTy:$cpol)),
+                   !if(isRet, " $sdst", " $sdata") #
+                     ", $sbase, " # offsets.Asm # "$cpol",
                    isRet>,
   AtomicNoRet <opNameWithSuffix, isRet> {
-  let has_offset = isImm;
-  let has_soffset = !not(isImm);
+  let has_offset = offsets.HasOffset;
+  let has_soffset = offsets.HasSOffset;
   let PseudoInstr = opNameWithSuffix;
 
   let Constraints = !if(isRet, "$sdst = $sdata", "");
@@ -308,10 +321,12 @@ class SM_Pseudo_Atomic<string opName,
 multiclass SM_Pseudo_Atomics<string opName,
                              RegisterClass baseClass,
                              RegisterClass dataClass> {
-  def _IMM      : SM_Pseudo_Atomic <opName, baseClass, dataClass, 1, 0>;
-  def _SGPR     : SM_Pseudo_Atomic <opName, baseClass, dataClass, 0, 0>;
-  def _IMM_RTN  : SM_Pseudo_Atomic <opName, baseClass, dataClass, 1, 1>;
-  def _SGPR_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, 0, 1>;
+  def _IMM      : SM_Pseudo_Atomic <opName, baseClass, dataClass, IMM_Offset, 0>;
+  def _SGPR     : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_Offset, 0>;
+  def _SGPR_IMM : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_IMM_Offset, 0>;
+  def _IMM_RTN  : SM_Pseudo_Atomic <opName, baseClass, dataClass, IMM_Offset, 1>;
+  def _SGPR_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_Offset, 1>;
+  def _SGPR_IMM_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_IMM_Offset, 1>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -705,8 +720,20 @@ class SMEM_Atomic_Real_vi <bits<8> op, SM_Atomic_Pseudo ps>
 multiclass SM_Real_Atomics_vi<bits<8> op, string ps> {
   def _IMM_vi       : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>;
   def _SGPR_vi      : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>;
+  def _SGPR_alt_gfx9
+    : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>,
+      SMEM_Real_SGPR_alt_gfx9;
+  let IsGFX9SpecificEncoding = true in
+  def _SGPR_IMM_gfx9
+    : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM)>;
   def _IMM_RTN_vi   : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>;
   def _SGPR_RTN_vi  : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>;
+  def _SGPR_RTN_alt_gfx9
+    : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>,
+      SMEM_Real_SGPR_alt_gfx9;
+  let IsGFX9SpecificEncoding = true in
+  def _SGPR_IMM_RTN_gfx9
+    : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM_RTN)>;
 }
 
 defm S_BUFFER_ATOMIC_SWAP         : SM_Real_Atomics_vi <0x40, "S_BUFFER_ATOMIC_SWAP">;
@@ -1103,8 +1130,10 @@ class SMEM_Atomic_Real_gfx10 <bits<8> op, SM_Atomic_Pseudo ps>
 multiclass SM_Real_Atomics_gfx10<bits<8> op, string ps> {
   def _IMM_gfx10       : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>;
   def _SGPR_gfx10      : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>;
+  def _SGPR_IMM_gfx10  : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM)>;
   def _IMM_RTN_gfx10   : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>;
   def _SGPR_RTN_gfx10  : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>;
+  def _SGPR_IMM_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM_RTN)>;
 }
 
 let SubtargetPredicate = HasScalarAtomics in {

diff  --git a/llvm/test/MC/AMDGPU/gfx10_asm_smem.s b/llvm/test/MC/AMDGPU/gfx10_asm_smem.s
index ef20f198a66db..ef4b460f67ab6 100644
--- a/llvm/test/MC/AMDGPU/gfx10_asm_smem.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_smem.s
@@ -983,6 +983,9 @@ s_atomic_add s5, s[2:3], s101
 s_atomic_add s5, s[2:3], 0x64
 // GFX10: encoding: [0x41,0x01,0x08,0xf6,0x64,0x00,0x00,0xfa]
 
+s_atomic_add s5, s[2:3], s7 offset:0x64
+// GFX10: encoding: [0x41,0x01,0x08,0xf6,0x64,0x00,0x00,0x0e]
+
 s_atomic_add_x2 s[10:11], s[2:3], s101
 // GFX10: encoding: [0x81,0x02,0x88,0xf6,0x00,0x00,0x00,0xca]
 
@@ -1109,6 +1112,9 @@ s_atomic_add s5, s[2:3], s101 glc
 s_atomic_add s5, s[2:3], 0x64 glc
 // GFX10: encoding: [0x41,0x01,0x09,0xf6,0x64,0x00,0x00,0xfa]
 
+s_atomic_add s5, s[2:3], s7 offset:0x64 glc
+// GFX10: encoding: [0x41,0x01,0x09,0xf6,0x64,0x00,0x00,0x0e]
+
 s_atomic_add_x2 s[10:11], s[2:3], s101 glc
 // GFX10: encoding: [0x81,0x02,0x89,0xf6,0x00,0x00,0x00,0xca]
 

diff  --git a/llvm/test/MC/AMDGPU/gfx9_asm_smem.s b/llvm/test/MC/AMDGPU/gfx9_asm_smem.s
index 371fe084fdcba..1fe4ec3f4cc6f 100644
--- a/llvm/test/MC/AMDGPU/gfx9_asm_smem.s
+++ b/llvm/test/MC/AMDGPU/gfx9_asm_smem.s
@@ -3123,9 +3123,15 @@ s_atomic_add s5, s[2:3], m0
 s_atomic_add s5, s[2:3], 0x0
 // CHECK: [0x41,0x01,0x0a,0xc2,0x00,0x00,0x00,0x00]
 
+s_atomic_add s5, s[2:3], s7 offset:0x12345
+// CHECK: [0x41,0x41,0x0a,0xc2,0x45,0x23,0x01,0x0e]
+
 s_atomic_add s5, s[2:3], s0 glc
 // CHECK: [0x41,0x01,0x09,0xc2,0x00,0x00,0x00,0x00]
 
+s_atomic_add s5, s[2:3], s7 offset:0x12345 glc
+// CHECK: [0x41,0x41,0x0b,0xc2,0x45,0x23,0x01,0x0e]
+
 s_atomic_sub s5, s[2:3], s0
 // CHECK: [0x41,0x01,0x0c,0xc2,0x00,0x00,0x00,0x00]
 

diff  --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
index 4d810c699c7cd..460a8c2326ce3 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
@@ -13253,12 +13253,18 @@
 # GFX10: s_atomic_add s5, s[2:3], 0x64           ; encoding: [0x41,0x01,0x08,0xf6,0x64,0x00,0x00,0xfa]
 0x41,0x01,0x08,0xf6,0x64,0x00,0x00,0xfa
 
+# GFX10: s_atomic_add s5, s[2:3], s7 offset:0x64 ; encoding: [0x41,0x01,0x08,0xf6,0x64,0x00,0x00,0x0e]
+0x41,0x01,0x08,0xf6,0x64,0x00,0x00,0x0e
+
 # GFX10: s_atomic_add s5, s[2:3], 0x64 dlc       ; encoding: [0x41,0x41,0x08,0xf6,0x64,0x00,0x00,0xfa]
 0x41,0x41,0x08,0xf6,0x64,0x00,0x00,0xfa
 
 # GFX10: s_atomic_add s5, s[2:3], 0x64 glc       ; encoding: [0x41,0x01,0x09,0xf6,0x64,0x00,0x00,0xfa]
 0x41,0x01,0x09,0xf6,0x64,0x00,0x00,0xfa
 
+# GFX10: s_atomic_add s5, s[2:3], s7 offset:0x64 glc ; encoding: [0x41,0x01,0x09,0xf6,0x64,0x00,0x00,0x0e]
+0x41,0x01,0x09,0xf6,0x64,0x00,0x00,0x0e
+
 # GFX10: s_atomic_add s5, s[2:3], s101           ; encoding: [0x41,0x01,0x08,0xf6,0x00,0x00,0x00,0xca]
 0x41,0x01,0x08,0xf6,0x00,0x00,0x00,0xca
 

diff  --git a/llvm/test/MC/Disassembler/AMDGPU/smem_gfx9.txt b/llvm/test/MC/Disassembler/AMDGPU/smem_gfx9.txt
index b561bf15af0ce..c2573060a1e9d 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/smem_gfx9.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/smem_gfx9.txt
@@ -66,9 +66,27 @@
 # GFX9: s_atomic_add s5, s[2:3], s101    ; encoding: [0x41,0x01,0x08,0xc2,0x65,0x00,0x00,0x00]
 0x41,0x01,0x08,0xc2,0x65,0x00,0x00,0x00
 
+# The SGRP variants can alternatively be encoded with imm=0, soffset_en=1
+# and the offset register encoded in the soffset field with the offset
+# field being disregarded.
+# GFX9: s_atomic_add s5, s[2:3], s101    ; encoding: [0x41,0x41,0x08,0xc2,0x00,0x00,0x00,0xca]
+0x41,0x41,0x08,0xc2,0x2e,0x00,0x00,0xca
+
+# The SGRP variants can alternatively be encoded with imm=0, soffset_en=1
+# and the offset register encoded in the soffset field with the offset
+# field being disregarded.
+# GFX9: s_atomic_add s5, s[2:3], s101 glc ; encoding: [0x41,0x41,0x09,0xc2,0x00,0x00,0x00,0xca]
+0x41,0x41,0x09,0xc2,0x2e,0x00,0x00,0xca
+
 # GFX9: s_atomic_add_x2 s[10:11], s[2:3], s101    ; encoding: [0x81,0x02,0x88,0xc2,0x65,0x00,0x00,0x00]
 0x81,0x02,0x88,0xc2,0x65,0x00,0x00,0x00
 
+# GFX9: s_atomic_add s5, s[2:3], s7 offset:0x12345 ; encoding: [0x41,0x41,0x0a,0xc2,0x45,0x23,0x01,0x0e]
+0x41,0x41,0x0a,0xc2,0x45,0x23,0x01,0x0e
+
+# GFX9: s_atomic_add s5, s[2:3], s7 offset:0x12345 glc ; encoding: [0x41,0x41,0x0b,0xc2,0x45,0x23,0x01,0x0e]
+0x41,0x41,0x0b,0xc2,0x45,0x23,0x01,0x0e
+
 # GFX9: s_atomic_and s101, s[2:3], s0    ; encoding: [0x41,0x19,0x20,0xc2,0x00,0x00,0x00,0x00]
 0x41,0x19,0x20,0xc2,0x00,0x00,0x00,0x00
 


        


More information about the llvm-commits mailing list