[llvm] 2349910 - [AMDGPU] Support for gfx940 flat lds opcodes
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 14 15:59:27 PDT 2022
Author: Stanislav Mekhanoshin
Date: 2022-03-14T15:46:19-07:00
New Revision: 23499103f77180977ddeb31b680cb66159141d61
URL: https://github.com/llvm/llvm-project/commit/23499103f77180977ddeb31b680cb66159141d61
DIFF: https://github.com/llvm/llvm-project/commit/23499103f77180977ddeb31b680cb66159141d61.diff
LOG: [AMDGPU] Support for gfx940 flat lds opcodes
Differential Revision: https://reviews.llvm.org/D121414
Added:
Modified:
llvm/lib/Target/AMDGPU/FLATInstructions.td
llvm/test/MC/AMDGPU/gfx940_asm_features.s
llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 9f086a29d16f5..a2a9c3d7788b5 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -243,6 +243,33 @@ multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
}
}
+class FLAT_Global_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0> : FLAT_Pseudo<
+ opName,
+ (outs ),
+ !con(
+ !if(EnableSaddr, (ins SReg_64:$saddr, VGPR_32:$vaddr), (ins VReg_64:$vaddr)),
+ (ins flat_offset:$offset, CPol_0:$cpol)),
+ " $vaddr"#!if(EnableSaddr, ", $saddr", ", off")#"$offset$cpol"> {
+ let LGKM_CNT = 1;
+ let is_flat_global = 1;
+ let has_data = 0;
+ let has_vdst = 0;
+ let mayLoad = 1;
+ let mayStore = 1;
+ let has_saddr = 1;
+ let enabled_saddr = EnableSaddr;
+ let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
+ let Uses = [M0, EXEC];
+ let SchedRW = [WriteVMEM, WriteLDS];
+}
+
+multiclass FLAT_Global_Load_LDS_Pseudo<string opName> {
+ def "" : FLAT_Global_Load_LDS_Pseudo<opName>,
+ GlobalSaddrTable<0, opName>;
+ def _SADDR : FLAT_Global_Load_LDS_Pseudo<opName, 1>,
+ GlobalSaddrTable<1, opName>;
+}
+
class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass,
bit EnableSaddr = 0> : FLAT_Pseudo<
opName,
@@ -366,6 +393,47 @@ multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> {
}
}
+class FLAT_Scratch_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0,
+ bit EnableSVE = 0,
+ bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr))> : FLAT_Pseudo<
+ opName,
+ (outs ),
+ !if(EnableSVE,
+ (ins VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol),
+ !if(EnableSaddr,
+ (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol),
+ !if(EnableVaddr,
+ (ins VGPR_32:$vaddr, flat_offset:$offset, CPol:$cpol),
+ (ins flat_offset:$offset, CPol:$cpol)))),
+ " "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
+
+ let LGKM_CNT = 1;
+ let is_flat_scratch = 1;
+ let has_data = 0;
+ let has_vdst = 0;
+ let mayLoad = 1;
+ let mayStore = 1;
+ let has_saddr = 1;
+ let enabled_saddr = EnableSaddr;
+ let has_vaddr = EnableVaddr;
+ let has_sve = EnableSVE;
+ let sve = EnableVaddr;
+ let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST")));
+ let Uses = [M0, EXEC];
+ let SchedRW = [WriteVMEM, WriteLDS];
+}
+
+multiclass FLAT_Scratch_Load_LDS_Pseudo<string opName> {
+ def "" : FLAT_Scratch_Load_LDS_Pseudo<opName>,
+ FlatScratchInst<opName, "SV">;
+ def _SADDR : FLAT_Scratch_Load_LDS_Pseudo<opName, 1>,
+ FlatScratchInst<opName, "SS">;
+ def _SVS : FLAT_Scratch_Load_LDS_Pseudo<opName, 1, 1>,
+ FlatScratchInst<opName, "SVS">;
+ def _ST : FLAT_Scratch_Load_LDS_Pseudo<opName, 0, 0, 0>,
+ FlatScratchInst<opName, "ST">;
+}
+
class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins,
string asm, list<dag> pattern = []> :
FLAT_Pseudo<opName, outs, ins, asm, pattern> {
@@ -772,6 +840,16 @@ defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2",
let SubtargetPredicate = HasGFX10_BEncoding in
defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_RTN <"global_atomic_csub",
VGPR_32, i32, int_amdgcn_global_atomic_csub>;
+
+let SubtargetPredicate = isGFX940Plus in {
+
+defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ubyte">;
+defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sbyte">;
+defm GLOBAL_LOAD_LDS_USHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ushort">;
+defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sshort">;
+defm GLOBAL_LOAD_LDS_DWORD : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dword">;
+
+} // End let SubtargetPredicate = isGFX940Plus
} // End is_flat_global = 1
@@ -803,6 +881,16 @@ defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4",
defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>;
defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>;
+let SubtargetPredicate = isGFX940Plus in {
+
+defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ubyte">;
+defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sbyte">;
+defm SCRATCH_LOAD_LDS_USHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ushort">;
+defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sshort">;
+defm SCRATCH_LOAD_LDS_DWORD : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_dword">;
+
+} // End let SubtargetPredicate = isGFX940Plus
+
} // End SubtargetPredicate = HasFlatScratchInsts
let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in {
@@ -1613,6 +1701,13 @@ defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>;
defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>;
defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>;
+let AssemblerPredicate = isGFX940Plus in {
+defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_vi <0x026>;
+defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_vi <0x027>;
+defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_vi <0x028>;
+defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_vi <0x029>;
+defm GLOBAL_LOAD_LDS_DWORD : FLAT_Real_AllAddr_vi <0x02a>;
+} // End let AssemblerPredicate = isGFX940Plus
defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Real_Atomics_vi <0x40>;
defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Real_Atomics_vi <0x41>;
@@ -1641,6 +1736,14 @@ defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Real_Atomics_vi <0x6a>;
defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Real_Atomics_vi <0x6b>;
defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Real_Atomics_vi <0x6c>;
+let AssemblerPredicate = isGFX940Plus in {
+defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_SVE_vi <0x026>;
+defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_SVE_vi <0x027>;
+defm SCRATCH_LOAD_LDS_USHORT : FLAT_Real_AllAddr_SVE_vi <0x028>;
+defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_SVE_vi <0x029>;
+defm SCRATCH_LOAD_LDS_DWORD : FLAT_Real_AllAddr_SVE_vi <0x02a>;
+} // End let AssemblerPredicate = isGFX940Plus
+
defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_SVE_vi <0x10>;
defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_SVE_vi <0x11>;
defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_SVE_vi <0x12>;
diff --git a/llvm/test/MC/AMDGPU/gfx940_asm_features.s b/llvm/test/MC/AMDGPU/gfx940_asm_features.s
index c1c7c0dc32281..56d25976bfbef 100644
--- a/llvm/test/MC/AMDGPU/gfx940_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx940_asm_features.s
@@ -129,6 +129,74 @@ ds_pk_add_rtn_bf16 v3, v2, v1
// GFX940: ds_pk_add_rtn_bf16 a3, v2, a1 ; encoding: [0x00,0x00,0x70,0xdb,0x02,0x01,0x00,0x03]
ds_pk_add_rtn_bf16 a3, v2, a1
+// NOT-GFX940: error: instruction not supported on this GPU
+// GFX940: global_load_lds_dword v[2:3], off ; encoding: [0x00,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00]
+global_load_lds_dword v[2:3], off
+
+// NOT-GFX940: error:
+// GFX940: global_load_lds_dword v[2:3], off sc0 nt sc1 ; encoding: [0x00,0x80,0xab,0xde,0x02,0x00,0x7f,0x00]
+global_load_lds_dword v[2:3], off sc0 nt sc1
+
+// NOT-GFX940: error:
+// GFX940: global_load_lds_dword v[2:3], off offset:4 ; encoding: [0x04,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00]
+global_load_lds_dword v[2:3], off offset:4
+
+// NOT-GFX940: error:
+// GFX940: global_load_lds_dword v2, s[4:5] offset:4 ; encoding: [0x04,0x80,0xa8,0xdc,0x02,0x00,0x04,0x00]
+global_load_lds_dword v2, s[4:5] offset:4
+
+// NOT-GFX940: error: instruction not supported on this GPU
+// GFX940: global_load_lds_ubyte v[2:3], off ; encoding: [0x00,0x80,0x98,0xdc,0x02,0x00,0x7f,0x00]
+global_load_lds_ubyte v[2:3], off
+
+// NOT-GFX940: error: instruction not supported on this GPU
+// GFX940: global_load_lds_sbyte v[2:3], off ; encoding: [0x00,0x80,0x9c,0xdc,0x02,0x00,0x7f,0x00]
+global_load_lds_sbyte v[2:3], off
+
+// NOT-GFX940: error: instruction not supported on this GPU
+// GFX940: global_load_lds_sshort v[2:3], off ; encoding: [0x00,0x80,0xa4,0xdc,0x02,0x00,0x7f,0x00]
+global_load_lds_sshort v[2:3], off
+
+// NOT-GFX940: error: instruction not supported on this GPU
+// GFX940: global_load_lds_ushort v[2:3], off ; encoding: [0x00,0x80,0xa0,0xdc,0x02,0x00,0x7f,0x00]
+global_load_lds_ushort v[2:3], off
+
+// NOT-GFX940: error: instruction not supported on this GPU
+// GFX940: scratch_load_lds_dword v2, off ; encoding: [0x00,0x60,0xa8,0xdc,0x02,0x00,0x7f,0x00]
+scratch_load_lds_dword v2, off
+
+// NOT-GFX940: error: instruction not supported on this GPU
+// GFX940: scratch_load_lds_dword v2, s4 ; encoding: [0x00,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00]
+scratch_load_lds_dword v2, s4
+
+// NOT-GFX940: error:
+// GFX940: scratch_load_lds_dword v2, s4 offset:4 ; encoding: [0x04,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00]
+scratch_load_lds_dword v2, s4 offset:4
+
+// NOT-GFX940: error:
+// GFX940: scratch_load_lds_dword off, s4 offset:4 ; encoding: [0x04,0x40,0xa8,0xdc,0x00,0x00,0x04,0x00]
+scratch_load_lds_dword off, s4 offset:4
+
+// NOT-GFX940: error:
+// GFX940: scratch_load_lds_dword off, off offset:4 ; encoding: [0x04,0x40,0xa8,0xdc,0x00,0x00,0x7f,0x00]
+scratch_load_lds_dword off, off offset:4
+
+// NOT-GFX940: error: instruction not supported on this GPU
+// GFX940: scratch_load_lds_ubyte v2, off ; encoding: [0x00,0x60,0x98,0xdc,0x02,0x00,0x7f,0x00]
+scratch_load_lds_ubyte v2, off
+
+// NOT-GFX940: error: instruction not supported on this GPU
+// GFX940: scratch_load_lds_sbyte v2, off ; encoding: [0x00,0x60,0x9c,0xdc,0x02,0x00,0x7f,0x00]
+scratch_load_lds_sbyte v2, off
+
+// NOT-GFX940: error: instruction not supported on this GPU
+// GFX940: scratch_load_lds_ushort v2, off ; encoding: [0x00,0x60,0xa0,0xdc,0x02,0x00,0x7f,0x00]
+scratch_load_lds_ushort v2, off
+
+// NOT-GFX940: error: instruction not supported on this GPU
+// GFX940: scratch_load_lds_sshort v2, off ; encoding: [0x00,0x60,0xa4,0xdc,0x02,0x00,0x7f,0x00]
+scratch_load_lds_sshort v2, off
+
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_mov_b64_e32 v[2:3], v[4:5] ; encoding: [0x04,0x71,0x04,0x7e]
v_mov_b64 v[2:3], v[4:5]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt
index 0922536d90aff..39dcf53452a4b 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt
@@ -87,6 +87,60 @@
# GFX940: ds_pk_add_rtn_bf16 a3, v2, a1 ; encoding: [0x00,0x00,0x70,0xdb,0x02,0x01,0x00,0x03]
0x00,0x00,0x70,0xdb,0x02,0x01,0x00,0x03
+# GFX940: global_load_lds_dword v[2:3], off ; encoding: [0x00,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00]
+0x00,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00
+
+# GFX940: global_load_lds_dword v[2:3], off sc0 nt sc1 ; encoding: [0x00,0x80,0xab,0xde,0x02,0x00,0x7f,0x00]
+0x00,0x80,0xab,0xde,0x02,0x00,0x7f,0x00
+
+# GFX940: global_load_lds_dword v[2:3], off offset:4 ; encoding: [0x04,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00]
+0x04,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00
+
+# GFX940: global_load_lds_dword v2, s[4:5] offset:4 ; encoding: [0x04,0x80,0xa8,0xdc,0x02,0x00,0x04,0x00]
+0x04,0x80,0xa8,0xdc,0x02,0x00,0x04,0x00
+
+# GFX940: global_load_lds_ubyte v[2:3], off ; encoding: [0x00,0x80,0x98,0xdc,0x02,0x00,0x7f,0x00]
+0x00,0x80,0x98,0xdc,0x02,0x00,0x7f,0x00
+
+# GFX940: global_load_lds_sbyte v[2:3], off ; encoding: [0x00,0x80,0x9c,0xdc,0x02,0x00,0x7f,0x00]
+0x00,0x80,0x9c,0xdc,0x02,0x00,0x7f,0x00
+
+# GFX940: global_load_lds_sshort v[2:3], off ; encoding: [0x00,0x80,0xa4,0xdc,0x02,0x00,0x7f,0x00]
+0x00,0x80,0xa4,0xdc,0x02,0x00,0x7f,0x00
+
+# GFX940: global_load_lds_ushort v[2:3], off ; encoding: [0x00,0x80,0xa0,0xdc,0x02,0x00,0x7f,0x00]
+0x00,0x80,0xa0,0xdc,0x02,0x00,0x7f,0x00
+
+# GFX940: scratch_load_lds_dword v2, off ; encoding: [0x00,0x60,0xa8,0xdc,0x02,0x00,0x7f,0x00]
+0x00,0x60,0xa8,0xdc,0x02,0x00,0x7f,0x00
+
+# GFX940: scratch_load_lds_dword v2, s4 ; encoding: [0x00,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00]
+0x00,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00
+
+# GFX940: scratch_load_lds_dword v2, s4 offset:4 ; encoding: [0x04,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00]
+0x04,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00
+
+# GFX940: scratch_load_lds_dword off, s4 offset:4 ; encoding: [0x04,0x40,0xa8,0xdc,0x00,0x00,0x04,0x00]
+0x04,0x40,0xa8,0xdc,0x00,0x00,0x04,0x00
+
+# GFX940: scratch_load_lds_dword off, off offset:4 ; encoding: [0x04,0x40,0xa8,0xdc,0x00,0x00,0x7f,0x00]
+0x04,0x40,0xa8,0xdc,0x00,0x00,0x7f,0x00
+
+# GFX940: scratch_load_lds_dword off, off offset:4 ; encoding: [0x04,0x40,0xa8,0xdc,0x00,0x00,0x7f,0x00]
+0x04,0x40,0xa8,0xdc,0x00,0x00,0x7f,0x00
+
+# GFX940: scratch_load_lds_ubyte v2, off ; encoding: [0x00,0x60,0x98,0xdc,0x02,0x00,0x7f,0x00]
+0x00,0x60,0x98,0xdc,0x02,0x00,0x7f,0x00
+
+# GFX940: scratch_load_lds_sbyte v2, off ; encoding: [0x00,0x60,0x9c,0xdc,0x02,0x00,0x7f,0x00]
+0x00,0x60,0x9c,0xdc,0x02,0x00,0x7f,0x00
+
+# GFX940: scratch_load_lds_ushort v2, off ; encoding: [0x00,0x60,0xa0,0xdc,0x02,0x00,0x7f,0x00]
+0x00,0x60,0xa0,0xdc,0x02,0x00,0x7f,0x00
+
+# GFX940: scratch_load_lds_sshort v2, off ; encoding: [0x00,0x60,0xa4,0xdc,0x02,0x00,0x7f,0x00]
+0x00,0x60,0xa4,0xdc,0x02,0x00,0x7f,0x00
+
# GFX940: v_mov_b64_e32 v[2:3], v[4:5] ; encoding: [0x04,0x71,0x04,0x7e]
0x04,0x71,0x04,0x7e
More information about the llvm-commits
mailing list