[llvm] 2349910 - [AMDGPU] Support for gfx940 flat lds opcodes

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 14 15:59:27 PDT 2022


Author: Stanislav Mekhanoshin
Date: 2022-03-14T15:46:19-07:00
New Revision: 23499103f77180977ddeb31b680cb66159141d61

URL: https://github.com/llvm/llvm-project/commit/23499103f77180977ddeb31b680cb66159141d61
DIFF: https://github.com/llvm/llvm-project/commit/23499103f77180977ddeb31b680cb66159141d61.diff

LOG: [AMDGPU] Support for gfx940 flat lds opcodes

Differential Revision: https://reviews.llvm.org/D121414

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/FLATInstructions.td
    llvm/test/MC/AMDGPU/gfx940_asm_features.s
    llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 9f086a29d16f5..a2a9c3d7788b5 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -243,6 +243,33 @@ multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
   }
 }
 
+class FLAT_Global_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0> : FLAT_Pseudo<
+  opName,
+  (outs ),
+  !con(
+      !if(EnableSaddr, (ins SReg_64:$saddr, VGPR_32:$vaddr), (ins VReg_64:$vaddr)),
+      (ins flat_offset:$offset, CPol_0:$cpol)),
+  " $vaddr"#!if(EnableSaddr, ", $saddr", ", off")#"$offset$cpol"> {
+  let LGKM_CNT = 1;
+  let is_flat_global = 1;
+  let has_data = 0;
+  let has_vdst = 0;
+  let mayLoad = 1;
+  let mayStore = 1;
+  let has_saddr = 1;
+  let enabled_saddr = EnableSaddr;
+  let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
+  let Uses = [M0, EXEC];
+  let SchedRW = [WriteVMEM, WriteLDS];
+}
+
+multiclass FLAT_Global_Load_LDS_Pseudo<string opName> {
+  def ""     : FLAT_Global_Load_LDS_Pseudo<opName>,
+    GlobalSaddrTable<0, opName>;
+  def _SADDR : FLAT_Global_Load_LDS_Pseudo<opName, 1>,
+    GlobalSaddrTable<1, opName>;
+}
+
 class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass,
   bit EnableSaddr = 0> : FLAT_Pseudo<
   opName,
@@ -366,6 +393,47 @@ multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> {
   }
 }
 
+class FLAT_Scratch_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0,
+  bit EnableSVE = 0,
+  bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr))> : FLAT_Pseudo<
+  opName,
+  (outs ),
+  !if(EnableSVE,
+    (ins VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol),
+    !if(EnableSaddr,
+      (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol),
+      !if(EnableVaddr,
+        (ins VGPR_32:$vaddr, flat_offset:$offset, CPol:$cpol),
+        (ins flat_offset:$offset, CPol:$cpol)))),
+  " "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
+
+  let LGKM_CNT = 1;
+  let is_flat_scratch = 1;
+  let has_data = 0;
+  let has_vdst = 0;
+  let mayLoad = 1;
+  let mayStore = 1;
+  let has_saddr = 1;
+  let enabled_saddr = EnableSaddr;
+  let has_vaddr = EnableVaddr;
+  let has_sve = EnableSVE;
+  let sve = EnableVaddr;
+  let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST")));
+  let Uses = [M0, EXEC];
+  let SchedRW = [WriteVMEM, WriteLDS];
+}
+
+multiclass FLAT_Scratch_Load_LDS_Pseudo<string opName> {
+  def ""     : FLAT_Scratch_Load_LDS_Pseudo<opName>,
+               FlatScratchInst<opName, "SV">;
+  def _SADDR : FLAT_Scratch_Load_LDS_Pseudo<opName, 1>,
+               FlatScratchInst<opName, "SS">;
+  def _SVS   : FLAT_Scratch_Load_LDS_Pseudo<opName, 1, 1>,
+               FlatScratchInst<opName, "SVS">;
+  def _ST    : FLAT_Scratch_Load_LDS_Pseudo<opName, 0, 0, 0>,
+               FlatScratchInst<opName, "ST">;
+}
+
 class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins,
                                string asm, list<dag> pattern = []> :
   FLAT_Pseudo<opName, outs, ins, asm, pattern> {
@@ -772,6 +840,16 @@ defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2",
 let SubtargetPredicate = HasGFX10_BEncoding in
 defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_RTN <"global_atomic_csub",
                               VGPR_32, i32, int_amdgcn_global_atomic_csub>;
+
+let SubtargetPredicate = isGFX940Plus in {
+
+defm GLOBAL_LOAD_LDS_UBYTE  : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ubyte">;
+defm GLOBAL_LOAD_LDS_SBYTE  : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sbyte">;
+defm GLOBAL_LOAD_LDS_USHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ushort">;
+defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sshort">;
+defm GLOBAL_LOAD_LDS_DWORD  : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dword">;
+
+} // End let SubtargetPredicate = isGFX940Plus
 } // End is_flat_global = 1
 
 
@@ -803,6 +881,16 @@ defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4",
 defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>;
 defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>;
 
+let SubtargetPredicate = isGFX940Plus in {
+
+defm SCRATCH_LOAD_LDS_UBYTE  : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ubyte">;
+defm SCRATCH_LOAD_LDS_SBYTE  : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sbyte">;
+defm SCRATCH_LOAD_LDS_USHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ushort">;
+defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sshort">;
+defm SCRATCH_LOAD_LDS_DWORD  : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_dword">;
+
+} // End let SubtargetPredicate = isGFX940Plus
+
 } // End SubtargetPredicate = HasFlatScratchInsts
 
 let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in {
@@ -1613,6 +1701,13 @@ defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>;
 defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>;
 defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>;
 
+let AssemblerPredicate = isGFX940Plus in {
+defm GLOBAL_LOAD_LDS_UBYTE  : FLAT_Real_AllAddr_vi <0x026>;
+defm GLOBAL_LOAD_LDS_SBYTE  : FLAT_Real_AllAddr_vi <0x027>;
+defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_vi <0x028>;
+defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_vi <0x029>;
+defm GLOBAL_LOAD_LDS_DWORD  : FLAT_Real_AllAddr_vi <0x02a>;
+} // End let AssemblerPredicate = isGFX940Plus
 
 defm GLOBAL_ATOMIC_SWAP       : FLAT_Global_Real_Atomics_vi <0x40>;
 defm GLOBAL_ATOMIC_CMPSWAP    : FLAT_Global_Real_Atomics_vi <0x41>;
@@ -1641,6 +1736,14 @@ defm GLOBAL_ATOMIC_XOR_X2     : FLAT_Global_Real_Atomics_vi <0x6a>;
 defm GLOBAL_ATOMIC_INC_X2     : FLAT_Global_Real_Atomics_vi <0x6b>;
 defm GLOBAL_ATOMIC_DEC_X2     : FLAT_Global_Real_Atomics_vi <0x6c>;
 
+let AssemblerPredicate = isGFX940Plus in {
+defm SCRATCH_LOAD_LDS_UBYTE  : FLAT_Real_AllAddr_SVE_vi <0x026>;
+defm SCRATCH_LOAD_LDS_SBYTE  : FLAT_Real_AllAddr_SVE_vi <0x027>;
+defm SCRATCH_LOAD_LDS_USHORT : FLAT_Real_AllAddr_SVE_vi <0x028>;
+defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_SVE_vi <0x029>;
+defm SCRATCH_LOAD_LDS_DWORD  : FLAT_Real_AllAddr_SVE_vi <0x02a>;
+} // End let AssemblerPredicate = isGFX940Plus
+
 defm SCRATCH_LOAD_UBYTE         : FLAT_Real_AllAddr_SVE_vi <0x10>;
 defm SCRATCH_LOAD_SBYTE         : FLAT_Real_AllAddr_SVE_vi <0x11>;
 defm SCRATCH_LOAD_USHORT        : FLAT_Real_AllAddr_SVE_vi <0x12>;

diff  --git a/llvm/test/MC/AMDGPU/gfx940_asm_features.s b/llvm/test/MC/AMDGPU/gfx940_asm_features.s
index c1c7c0dc32281..56d25976bfbef 100644
--- a/llvm/test/MC/AMDGPU/gfx940_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx940_asm_features.s
@@ -129,6 +129,74 @@ ds_pk_add_rtn_bf16  v3, v2, v1
 // GFX940: ds_pk_add_rtn_bf16 a3, v2, a1           ; encoding: [0x00,0x00,0x70,0xdb,0x02,0x01,0x00,0x03]
 ds_pk_add_rtn_bf16  a3, v2, a1
 
+// NOT-GFX940: error: instruction not supported on this GPU
+// GFX940: global_load_lds_dword v[2:3], off       ; encoding: [0x00,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00]
+global_load_lds_dword v[2:3], off
+
+// NOT-GFX940: error:
+// GFX940: global_load_lds_dword v[2:3], off sc0 nt sc1 ; encoding: [0x00,0x80,0xab,0xde,0x02,0x00,0x7f,0x00]
+global_load_lds_dword v[2:3], off sc0 nt sc1
+
+// NOT-GFX940: error:
+// GFX940: global_load_lds_dword v[2:3], off offset:4 ; encoding: [0x04,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00]
+global_load_lds_dword v[2:3], off offset:4
+
+// NOT-GFX940: error:
+// GFX940: global_load_lds_dword v2, s[4:5] offset:4 ; encoding: [0x04,0x80,0xa8,0xdc,0x02,0x00,0x04,0x00]
+global_load_lds_dword v2, s[4:5] offset:4
+
+// NOT-GFX940: error: instruction not supported on this GPU
+// GFX940: global_load_lds_ubyte v[2:3], off       ; encoding: [0x00,0x80,0x98,0xdc,0x02,0x00,0x7f,0x00]
+global_load_lds_ubyte v[2:3], off
+
+// NOT-GFX940: error: instruction not supported on this GPU
+// GFX940: global_load_lds_sbyte v[2:3], off       ; encoding: [0x00,0x80,0x9c,0xdc,0x02,0x00,0x7f,0x00]
+global_load_lds_sbyte v[2:3], off
+
+// NOT-GFX940: error: instruction not supported on this GPU
+// GFX940: global_load_lds_sshort v[2:3], off      ; encoding: [0x00,0x80,0xa4,0xdc,0x02,0x00,0x7f,0x00]
+global_load_lds_sshort v[2:3], off
+
+// NOT-GFX940: error: instruction not supported on this GPU
+// GFX940: global_load_lds_ushort v[2:3], off      ; encoding: [0x00,0x80,0xa0,0xdc,0x02,0x00,0x7f,0x00]
+global_load_lds_ushort v[2:3], off
+
+// NOT-GFX940: error: instruction not supported on this GPU
+// GFX940: scratch_load_lds_dword v2, off          ; encoding: [0x00,0x60,0xa8,0xdc,0x02,0x00,0x7f,0x00]
+scratch_load_lds_dword v2, off
+
+// NOT-GFX940: error: instruction not supported on this GPU
+// GFX940: scratch_load_lds_dword v2, s4           ; encoding: [0x00,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00]
+scratch_load_lds_dword v2, s4
+
+// NOT-GFX940: error:
+// GFX940: scratch_load_lds_dword v2, s4 offset:4  ; encoding: [0x04,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00]
+scratch_load_lds_dword v2, s4 offset:4
+
+// NOT-GFX940: error:
+// GFX940: scratch_load_lds_dword off, s4 offset:4 ; encoding: [0x04,0x40,0xa8,0xdc,0x00,0x00,0x04,0x00]
+scratch_load_lds_dword off, s4 offset:4
+
+// NOT-GFX940: error:
+// GFX940: scratch_load_lds_dword off, off offset:4 ; encoding: [0x04,0x40,0xa8,0xdc,0x00,0x00,0x7f,0x00]
+scratch_load_lds_dword off, off offset:4
+
+// NOT-GFX940: error: instruction not supported on this GPU
+// GFX940: scratch_load_lds_ubyte v2, off          ; encoding: [0x00,0x60,0x98,0xdc,0x02,0x00,0x7f,0x00]
+scratch_load_lds_ubyte v2, off
+
+// NOT-GFX940: error: instruction not supported on this GPU
+// GFX940: scratch_load_lds_sbyte v2, off          ; encoding: [0x00,0x60,0x9c,0xdc,0x02,0x00,0x7f,0x00]
+scratch_load_lds_sbyte v2, off
+
+// NOT-GFX940: error: instruction not supported on this GPU
+// GFX940: scratch_load_lds_ushort v2, off         ; encoding: [0x00,0x60,0xa0,0xdc,0x02,0x00,0x7f,0x00]
+scratch_load_lds_ushort v2, off
+
+// NOT-GFX940: error: instruction not supported on this GPU
+// GFX940: scratch_load_lds_sshort v2, off         ; encoding: [0x00,0x60,0xa4,0xdc,0x02,0x00,0x7f,0x00]
+scratch_load_lds_sshort v2, off
+
 // NOT-GFX940: error: instruction not supported on this GPU
 // GFX940: v_mov_b64_e32 v[2:3], v[4:5]            ; encoding: [0x04,0x71,0x04,0x7e]
 v_mov_b64 v[2:3], v[4:5]

diff  --git a/llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt
index 0922536d90aff..39dcf53452a4b 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt
@@ -87,6 +87,60 @@
 # GFX940: ds_pk_add_rtn_bf16 a3, v2, a1           ; encoding: [0x00,0x00,0x70,0xdb,0x02,0x01,0x00,0x03]
 0x00,0x00,0x70,0xdb,0x02,0x01,0x00,0x03
 
+# GFX940: global_load_lds_dword v[2:3], off       ; encoding: [0x00,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00]
+0x00,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00
+
+# GFX940: global_load_lds_dword v[2:3], off sc0 nt sc1 ; encoding: [0x00,0x80,0xab,0xde,0x02,0x00,0x7f,0x00]
+0x00,0x80,0xab,0xde,0x02,0x00,0x7f,0x00
+
+# GFX940: global_load_lds_dword v[2:3], off offset:4 ; encoding: [0x04,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00]
+0x04,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00
+
+# GFX940: global_load_lds_dword v2, s[4:5] offset:4 ; encoding: [0x04,0x80,0xa8,0xdc,0x02,0x00,0x04,0x00]
+0x04,0x80,0xa8,0xdc,0x02,0x00,0x04,0x00
+
+# GFX940: global_load_lds_ubyte v[2:3], off       ; encoding: [0x00,0x80,0x98,0xdc,0x02,0x00,0x7f,0x00]
+0x00,0x80,0x98,0xdc,0x02,0x00,0x7f,0x00
+
+# GFX940: global_load_lds_sbyte v[2:3], off       ; encoding: [0x00,0x80,0x9c,0xdc,0x02,0x00,0x7f,0x00]
+0x00,0x80,0x9c,0xdc,0x02,0x00,0x7f,0x00
+
+# GFX940: global_load_lds_sshort v[2:3], off      ; encoding: [0x00,0x80,0xa4,0xdc,0x02,0x00,0x7f,0x00]
+0x00,0x80,0xa4,0xdc,0x02,0x00,0x7f,0x00
+
+# GFX940: global_load_lds_ushort v[2:3], off      ; encoding: [0x00,0x80,0xa0,0xdc,0x02,0x00,0x7f,0x00]
+0x00,0x80,0xa0,0xdc,0x02,0x00,0x7f,0x00
+
+# GFX940: scratch_load_lds_dword v2, off          ; encoding: [0x00,0x60,0xa8,0xdc,0x02,0x00,0x7f,0x00]
+0x00,0x60,0xa8,0xdc,0x02,0x00,0x7f,0x00
+
+# GFX940: scratch_load_lds_dword v2, s4           ; encoding: [0x00,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00]
+0x00,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00
+
+# GFX940: scratch_load_lds_dword v2, s4 offset:4  ; encoding: [0x04,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00]
+0x04,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00
+
+# GFX940: scratch_load_lds_dword off, s4 offset:4 ; encoding: [0x04,0x40,0xa8,0xdc,0x00,0x00,0x04,0x00]
+0x04,0x40,0xa8,0xdc,0x00,0x00,0x04,0x00
+
+# GFX940: scratch_load_lds_dword off, off offset:4 ; encoding: [0x04,0x40,0xa8,0xdc,0x00,0x00,0x7f,0x00]
+0x04,0x40,0xa8,0xdc,0x00,0x00,0x7f,0x00
+
+# GFX940: scratch_load_lds_dword off, off offset:4 ; encoding: [0x04,0x40,0xa8,0xdc,0x00,0x00,0x7f,0x00]
+0x04,0x40,0xa8,0xdc,0x00,0x00,0x7f,0x00
+
+# GFX940: scratch_load_lds_ubyte v2, off          ; encoding: [0x00,0x60,0x98,0xdc,0x02,0x00,0x7f,0x00]
+0x00,0x60,0x98,0xdc,0x02,0x00,0x7f,0x00
+
+# GFX940: scratch_load_lds_sbyte v2, off          ; encoding: [0x00,0x60,0x9c,0xdc,0x02,0x00,0x7f,0x00]
+0x00,0x60,0x9c,0xdc,0x02,0x00,0x7f,0x00
+
+# GFX940: scratch_load_lds_ushort v2, off         ; encoding: [0x00,0x60,0xa0,0xdc,0x02,0x00,0x7f,0x00]
+0x00,0x60,0xa0,0xdc,0x02,0x00,0x7f,0x00
+
+# GFX940: scratch_load_lds_sshort v2, off         ; encoding: [0x00,0x60,0xa4,0xdc,0x02,0x00,0x7f,0x00]
+0x00,0x60,0xa4,0xdc,0x02,0x00,0x7f,0x00
+
 # GFX940: v_mov_b64_e32 v[2:3], v[4:5]            ; encoding: [0x04,0x71,0x04,0x7e]
 0x04,0x71,0x04,0x7e
 


        


More information about the llvm-commits mailing list