[llvm] 60d6fbb - [AMDGPU][GFX9][GFX10] Support base+soffset+offset SMEM atomics.
Ivan Kosarev via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 10 05:23:26 PDT 2022
Author: Ivan Kosarev
Date: 2022-06-10T13:22:41+01:00
New Revision: 60d6fbb62110f673477291e135c6c6b2c721f2ee
URL: https://github.com/llvm/llvm-project/commit/60d6fbb62110f673477291e135c6c6b2c721f2ee
DIFF: https://github.com/llvm/llvm-project/commit/60d6fbb62110f673477291e135c6c6b2c721f2ee.diff
LOG: [AMDGPU][GFX9][GFX10] Support base+soffset+offset SMEM atomics.
Resolves a part of
https://github.com/llvm/llvm-project/issues/38652
Reviewed By: dp
Differential Revision: https://reviews.llvm.org/D127314
Added:
Modified:
llvm/lib/Target/AMDGPU/SMInstructions.td
llvm/test/MC/AMDGPU/gfx10_asm_smem.s
llvm/test/MC/AMDGPU/gfx9_asm_smem.s
llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
llvm/test/MC/Disassembler/AMDGPU/smem_gfx9.txt
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index 530835bc22c8b..882d13402a192 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -87,6 +87,21 @@ class SM_Real <SM_Pseudo ps, string opName = ps.Mnemonic>
bits<5> cpol;
}
+class OffsetMode<bit hasOffset, bit hasSOffset, string variant,
+ dag ins, string asm> {
+ bit HasOffset = hasOffset;
+ bit HasSOffset = hasSOffset;
+ string Variant = variant;
+ dag Ins = ins;
+ string Asm = asm;
+}
+
+def IMM_Offset : OffsetMode<1, 0, "_IMM", (ins smem_offset:$offset), "$offset">;
+def SGPR_Offset : OffsetMode<0, 1, "_SGPR", (ins SReg_32:$soffset), "$soffset">;
+def SGPR_IMM_Offset : OffsetMode<1, 1, "_SGPR_IMM",
+ (ins SReg_32:$soffset, smem_offset_mod:$offset),
+ "$soffset$offset">;
+
class SM_Probe_Pseudo <string opName, string variant, RegisterClass baseClass,
dag offsets, string asmOffsets,
bit hasOffset, bit hasSOffset>
@@ -282,23 +297,21 @@ class SM_Atomic_Pseudo <string opName,
class SM_Pseudo_Atomic<string opName,
RegisterClass baseClass,
RegisterClass dataClass,
- bit isImm,
+ OffsetMode offsets,
bit isRet,
- string opNameWithSuffix = opName # !if(isImm,
- !if(isRet, "_IMM_RTN", "_IMM"),
- !if(isRet, "_SGPR_RTN", "_SGPR")),
+ string opNameWithSuffix =
+ opName # offsets.Variant # !if(isRet, "_RTN", ""),
Operand CPolTy = !if(isRet, CPol_GLC1, CPol)> :
SM_Atomic_Pseudo<opName,
!if(isRet, (outs dataClass:$sdst), (outs)),
- !if(isImm,
- (ins dataClass:$sdata, baseClass:$sbase, smem_offset:$offset, CPolTy:$cpol),
- (ins dataClass:$sdata, baseClass:$sbase, SReg_32:$soffset, CPolTy:$cpol)),
- !if(isRet, " $sdst", " $sdata") # ", $sbase, " #
- !if(isImm, "$offset", "$soffset") # "$cpol",
+ !con((ins dataClass:$sdata, baseClass:$sbase), offsets.Ins,
+ (ins CPolTy:$cpol)),
+ !if(isRet, " $sdst", " $sdata") #
+ ", $sbase, " # offsets.Asm # "$cpol",
isRet>,
AtomicNoRet <opNameWithSuffix, isRet> {
- let has_offset = isImm;
- let has_soffset = !not(isImm);
+ let has_offset = offsets.HasOffset;
+ let has_soffset = offsets.HasSOffset;
let PseudoInstr = opNameWithSuffix;
let Constraints = !if(isRet, "$sdst = $sdata", "");
@@ -308,10 +321,12 @@ class SM_Pseudo_Atomic<string opName,
multiclass SM_Pseudo_Atomics<string opName,
RegisterClass baseClass,
RegisterClass dataClass> {
- def _IMM : SM_Pseudo_Atomic <opName, baseClass, dataClass, 1, 0>;
- def _SGPR : SM_Pseudo_Atomic <opName, baseClass, dataClass, 0, 0>;
- def _IMM_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, 1, 1>;
- def _SGPR_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, 0, 1>;
+ def _IMM : SM_Pseudo_Atomic <opName, baseClass, dataClass, IMM_Offset, 0>;
+ def _SGPR : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_Offset, 0>;
+ def _SGPR_IMM : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_IMM_Offset, 0>;
+ def _IMM_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, IMM_Offset, 1>;
+ def _SGPR_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_Offset, 1>;
+ def _SGPR_IMM_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_IMM_Offset, 1>;
}
//===----------------------------------------------------------------------===//
@@ -705,8 +720,20 @@ class SMEM_Atomic_Real_vi <bits<8> op, SM_Atomic_Pseudo ps>
multiclass SM_Real_Atomics_vi<bits<8> op, string ps> {
def _IMM_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>;
def _SGPR_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>;
+ def _SGPR_alt_gfx9
+ : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>,
+ SMEM_Real_SGPR_alt_gfx9;
+ let IsGFX9SpecificEncoding = true in
+ def _SGPR_IMM_gfx9
+ : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM)>;
def _IMM_RTN_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>;
def _SGPR_RTN_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>;
+ def _SGPR_RTN_alt_gfx9
+ : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>,
+ SMEM_Real_SGPR_alt_gfx9;
+ let IsGFX9SpecificEncoding = true in
+ def _SGPR_IMM_RTN_gfx9
+ : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM_RTN)>;
}
defm S_BUFFER_ATOMIC_SWAP : SM_Real_Atomics_vi <0x40, "S_BUFFER_ATOMIC_SWAP">;
@@ -1103,8 +1130,10 @@ class SMEM_Atomic_Real_gfx10 <bits<8> op, SM_Atomic_Pseudo ps>
multiclass SM_Real_Atomics_gfx10<bits<8> op, string ps> {
def _IMM_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>;
def _SGPR_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>;
+ def _SGPR_IMM_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM)>;
def _IMM_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>;
def _SGPR_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>;
+ def _SGPR_IMM_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM_RTN)>;
}
let SubtargetPredicate = HasScalarAtomics in {
diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_smem.s b/llvm/test/MC/AMDGPU/gfx10_asm_smem.s
index ef20f198a66db..ef4b460f67ab6 100644
--- a/llvm/test/MC/AMDGPU/gfx10_asm_smem.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_smem.s
@@ -983,6 +983,9 @@ s_atomic_add s5, s[2:3], s101
s_atomic_add s5, s[2:3], 0x64
// GFX10: encoding: [0x41,0x01,0x08,0xf6,0x64,0x00,0x00,0xfa]
+s_atomic_add s5, s[2:3], s7 offset:0x64
+// GFX10: encoding: [0x41,0x01,0x08,0xf6,0x64,0x00,0x00,0x0e]
+
s_atomic_add_x2 s[10:11], s[2:3], s101
// GFX10: encoding: [0x81,0x02,0x88,0xf6,0x00,0x00,0x00,0xca]
@@ -1109,6 +1112,9 @@ s_atomic_add s5, s[2:3], s101 glc
s_atomic_add s5, s[2:3], 0x64 glc
// GFX10: encoding: [0x41,0x01,0x09,0xf6,0x64,0x00,0x00,0xfa]
+s_atomic_add s5, s[2:3], s7 offset:0x64 glc
+// GFX10: encoding: [0x41,0x01,0x09,0xf6,0x64,0x00,0x00,0x0e]
+
s_atomic_add_x2 s[10:11], s[2:3], s101 glc
// GFX10: encoding: [0x81,0x02,0x89,0xf6,0x00,0x00,0x00,0xca]
diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_smem.s b/llvm/test/MC/AMDGPU/gfx9_asm_smem.s
index 371fe084fdcba..1fe4ec3f4cc6f 100644
--- a/llvm/test/MC/AMDGPU/gfx9_asm_smem.s
+++ b/llvm/test/MC/AMDGPU/gfx9_asm_smem.s
@@ -3123,9 +3123,15 @@ s_atomic_add s5, s[2:3], m0
s_atomic_add s5, s[2:3], 0x0
// CHECK: [0x41,0x01,0x0a,0xc2,0x00,0x00,0x00,0x00]
+s_atomic_add s5, s[2:3], s7 offset:0x12345
+// CHECK: [0x41,0x41,0x0a,0xc2,0x45,0x23,0x01,0x0e]
+
s_atomic_add s5, s[2:3], s0 glc
// CHECK: [0x41,0x01,0x09,0xc2,0x00,0x00,0x00,0x00]
+s_atomic_add s5, s[2:3], s7 offset:0x12345 glc
+// CHECK: [0x41,0x41,0x0b,0xc2,0x45,0x23,0x01,0x0e]
+
s_atomic_sub s5, s[2:3], s0
// CHECK: [0x41,0x01,0x0c,0xc2,0x00,0x00,0x00,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
index 4d810c699c7cd..460a8c2326ce3 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
@@ -13253,12 +13253,18 @@
# GFX10: s_atomic_add s5, s[2:3], 0x64 ; encoding: [0x41,0x01,0x08,0xf6,0x64,0x00,0x00,0xfa]
0x41,0x01,0x08,0xf6,0x64,0x00,0x00,0xfa
+# GFX10: s_atomic_add s5, s[2:3], s7 offset:0x64 ; encoding: [0x41,0x01,0x08,0xf6,0x64,0x00,0x00,0x0e]
+0x41,0x01,0x08,0xf6,0x64,0x00,0x00,0x0e
+
# GFX10: s_atomic_add s5, s[2:3], 0x64 dlc ; encoding: [0x41,0x41,0x08,0xf6,0x64,0x00,0x00,0xfa]
0x41,0x41,0x08,0xf6,0x64,0x00,0x00,0xfa
# GFX10: s_atomic_add s5, s[2:3], 0x64 glc ; encoding: [0x41,0x01,0x09,0xf6,0x64,0x00,0x00,0xfa]
0x41,0x01,0x09,0xf6,0x64,0x00,0x00,0xfa
+# GFX10: s_atomic_add s5, s[2:3], s7 offset:0x64 glc ; encoding: [0x41,0x01,0x09,0xf6,0x64,0x00,0x00,0x0e]
+0x41,0x01,0x09,0xf6,0x64,0x00,0x00,0x0e
+
# GFX10: s_atomic_add s5, s[2:3], s101 ; encoding: [0x41,0x01,0x08,0xf6,0x00,0x00,0x00,0xca]
0x41,0x01,0x08,0xf6,0x00,0x00,0x00,0xca
diff --git a/llvm/test/MC/Disassembler/AMDGPU/smem_gfx9.txt b/llvm/test/MC/Disassembler/AMDGPU/smem_gfx9.txt
index b561bf15af0ce..c2573060a1e9d 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/smem_gfx9.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/smem_gfx9.txt
@@ -66,9 +66,27 @@
# GFX9: s_atomic_add s5, s[2:3], s101 ; encoding: [0x41,0x01,0x08,0xc2,0x65,0x00,0x00,0x00]
0x41,0x01,0x08,0xc2,0x65,0x00,0x00,0x00
+# The SGRP variants can alternatively be encoded with imm=0, soffset_en=1
+# and the offset register encoded in the soffset field with the offset
+# field being disregarded.
+# GFX9: s_atomic_add s5, s[2:3], s101 ; encoding: [0x41,0x41,0x08,0xc2,0x00,0x00,0x00,0xca]
+0x41,0x41,0x08,0xc2,0x2e,0x00,0x00,0xca
+
+# The SGRP variants can alternatively be encoded with imm=0, soffset_en=1
+# and the offset register encoded in the soffset field with the offset
+# field being disregarded.
+# GFX9: s_atomic_add s5, s[2:3], s101 glc ; encoding: [0x41,0x41,0x09,0xc2,0x00,0x00,0x00,0xca]
+0x41,0x41,0x09,0xc2,0x2e,0x00,0x00,0xca
+
# GFX9: s_atomic_add_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x88,0xc2,0x65,0x00,0x00,0x00]
0x81,0x02,0x88,0xc2,0x65,0x00,0x00,0x00
+# GFX9: s_atomic_add s5, s[2:3], s7 offset:0x12345 ; encoding: [0x41,0x41,0x0a,0xc2,0x45,0x23,0x01,0x0e]
+0x41,0x41,0x0a,0xc2,0x45,0x23,0x01,0x0e
+
+# GFX9: s_atomic_add s5, s[2:3], s7 offset:0x12345 glc ; encoding: [0x41,0x41,0x0b,0xc2,0x45,0x23,0x01,0x0e]
+0x41,0x41,0x0b,0xc2,0x45,0x23,0x01,0x0e
+
# GFX9: s_atomic_and s101, s[2:3], s0 ; encoding: [0x41,0x19,0x20,0xc2,0x00,0x00,0x00,0x00]
0x41,0x19,0x20,0xc2,0x00,0x00,0x00,0x00
More information about the llvm-commits
mailing list