[llvm] fbf0ca6 - [AMDGPU][GFX12] Add support for new block ls instructions (#96273)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 21 11:12:21 PDT 2024
Author: Mariusz Sikora
Date: 2024-06-21T20:12:18+02:00
New Revision: fbf0ca64182baa5fac73fa012ed1fb6805fa3581
URL: https://github.com/llvm/llvm-project/commit/fbf0ca64182baa5fac73fa012ed1fb6805fa3581
DIFF: https://github.com/llvm/llvm-project/commit/fbf0ca64182baa5fac73fa012ed1fb6805fa3581.diff
LOG: [AMDGPU][GFX12] Add support for new block ls instructions (#96273)
Add MC layer support for new instructions:
GLOBAL_LOAD_BLOCK
GLOBAL_STORE_BLOCK
SCRATCH_LOAD_BLOCK
SCRATCH_STORE_BLOCK
Co-authored-by: Piotr Sobczak <piotr.sobczak at amd.com>
Added:
Modified:
llvm/lib/Target/AMDGPU/FLATInstructions.td
llvm/lib/Target/AMDGPU/SIInstrInfo.td
llvm/lib/Target/AMDGPU/SIRegisterInfo.td
llvm/test/MC/AMDGPU/gfx12_asm_vflat.s
llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vflat.txt
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 3d2a74adab965..8ecbd62903a24 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -966,6 +966,15 @@ defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_s
defm SCRATCH_LOAD_LDS_DWORD : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_dword">;
let SubtargetPredicate = isGFX12Plus in {
+ let Uses = [EXEC, M0] in {
+ defm GLOBAL_LOAD_BLOCK : FLAT_Global_Load_Pseudo <"global_load_block", VReg_1024>;
+ defm GLOBAL_STORE_BLOCK : FLAT_Global_Store_Pseudo <"global_store_block", VReg_1024>;
+ }
+ let Uses = [EXEC, FLAT_SCR, M0] in {
+ defm SCRATCH_LOAD_BLOCK : FLAT_Scratch_Load_Pseudo <"scratch_load_block", VReg_1024>;
+ defm SCRATCH_STORE_BLOCK : FLAT_Scratch_Store_Pseudo <"scratch_store_block", VReg_1024>;
+ }
+
let WaveSizePredicate = isWave32 in {
let Mnemonic = "global_load_tr_b128" in
defm GLOBAL_LOAD_TR_B128_w32 : FLAT_Global_Load_Pseudo <"global_load_tr_b128_w32", VReg_128>;
@@ -2658,6 +2667,8 @@ defm GLOBAL_STORE_BYTE_D16_HI : VGLOBAL_Real_AllAddr_gfx12<0x024, "global_s
defm GLOBAL_STORE_SHORT_D16_HI : VGLOBAL_Real_AllAddr_gfx12<0x025, "global_store_d16_hi_b16">;
defm GLOBAL_LOAD_DWORD_ADDTID : VGLOBAL_Real_AllAddr_gfx12<0x028, "global_load_addtid_b32">;
defm GLOBAL_STORE_DWORD_ADDTID : VGLOBAL_Real_AllAddr_gfx12<0x029, "global_store_addtid_b32">;
+defm GLOBAL_LOAD_BLOCK : VGLOBAL_Real_AllAddr_gfx12<0x053>;
+defm GLOBAL_STORE_BLOCK : VGLOBAL_Real_AllAddr_gfx12<0x054>;
defm GLOBAL_ATOMIC_SWAP : VGLOBAL_Real_Atomics_gfx12<0x033, "global_atomic_swap_b32">;
defm GLOBAL_ATOMIC_CMPSWAP : VGLOBAL_Real_Atomics_gfx12<0x034, "global_atomic_cmpswap_b32">;
@@ -2728,3 +2739,6 @@ defm SCRATCH_LOAD_SBYTE_D16_HI : VSCRATCH_Real_AllAddr_gfx12<0x22, "scratch_
defm SCRATCH_LOAD_SHORT_D16_HI : VSCRATCH_Real_AllAddr_gfx12<0x23, "scratch_load_d16_hi_b16">;
defm SCRATCH_STORE_BYTE_D16_HI : VSCRATCH_Real_AllAddr_gfx12<0x24, "scratch_store_d16_hi_b8">;
defm SCRATCH_STORE_SHORT_D16_HI : VSCRATCH_Real_AllAddr_gfx12<0x25, "scratch_store_d16_hi_b16">;
+
+defm SCRATCH_LOAD_BLOCK : VSCRATCH_Real_AllAddr_gfx12<0x53>;
+defm SCRATCH_STORE_BLOCK : VSCRATCH_Real_AllAddr_gfx12<0x54>;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 80c623514bda1..c64b3a7c356f2 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2230,7 +2230,8 @@ class getLdStRegisterOperand<RegisterClass RC> {
!eq(RC.Size, 64) : AVLdSt_64,
!eq(RC.Size, 96) : AVLdSt_96,
!eq(RC.Size, 128) : AVLdSt_128,
- !eq(RC.Size, 160) : AVLdSt_160);
+ !eq(RC.Size, 160) : AVLdSt_160,
+ !eq(RC.Size, 1024) : AVLdSt_1024);
}
class getHasVOP3DPP <ValueType DstVT = i32, ValueType Src0VT = i32,
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index a8efe2b2ba35e..f1d9aec163635 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -1344,6 +1344,7 @@ def AVLdSt_64 : AVLdStOperand<AV_64, "OPW64">;
def AVLdSt_96 : AVLdStOperand<AV_96, "OPW96">;
def AVLdSt_128 : AVLdStOperand<AV_128, "OPW128">;
def AVLdSt_160 : AVLdStOperand<AV_160, "OPW160">;
+def AVLdSt_1024 : AVLdStOperand<AV_1024, "OPW1024">;
//===----------------------------------------------------------------------===//
// ACSrc_* Operands with an AGPR or an inline constant
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vflat.s b/llvm/test/MC/AMDGPU/gfx12_asm_vflat.s
index 575bc1a8255a6..30bfaff8f17aa 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vflat.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vflat.s
@@ -1896,6 +1896,27 @@ global_load_u8 v1, v[0:1], off offset:64
global_load_u8 v1, v[3:4], off
// GFX12: encoding: [0x7c,0x00,0x04,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00]
+global_load_block v[9:40], v0, s[0:1] offset:-64
+// GFX12: encoding: [0x00,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
+
+global_load_block v[9:40], v0, s[0:1] offset:64
+// GFX12: encoding: [0x00,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
+
+global_load_block v[9:40], v5, s[2:3]
+// GFX12: encoding: [0x02,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x05,0x00,0x00,0x00]
+
+global_load_block v[9:40], v[0:1], off offset:-64
+// GFX12: encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
+
+global_load_block v[9:40], v[0:1], off offset:64
+// GFX12: encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
+
+global_load_block v[9:40], v[5:6], off
+// GFX12: encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x05,0x00,0x00,0x00]
+
+global_load_block v[9:40], v[5:6], off th:TH_LOAD_HT scope:SCOPE_SE
+// GFX12: encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x24,0x00,0x05,0x00,0x00,0x00]
+
global_store_addtid_b32 v2, off offset:-64
// GFX12: encoding: [0x7c,0x40,0x0a,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
@@ -2058,6 +2079,27 @@ global_store_d16_hi_b8 v[0:1], v2, off offset:64
global_store_d16_hi_b8 v[3:4], v1, off
// GFX12: encoding: [0x7c,0x00,0x09,0xee,0x00,0x00,0x80,0x00,0x03,0x00,0x00,0x00]
+global_store_block v0, v[2:33], s[0:1] offset:-64
+// GFX12: encoding: [0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
+
+global_store_block v0, v[2:33], s[0:1] offset:64
+// GFX12: encoding: [0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
+
+global_store_block v1, v[3:34], s[2:3]
+// GFX12: encoding: [0x02,0x00,0x15,0xee,0x00,0x00,0x80,0x01,0x01,0x00,0x00,0x00]
+
+global_store_block v[0:1], v[2:33], off offset:-64
+// GFX12: encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
+
+global_store_block v[0:1], v[2:33], off offset:64
+// GFX12: encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
+
+global_store_block v[1:2], v[3:34], off
+// GFX12: encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x80,0x01,0x01,0x00,0x00,0x00]
+
+global_store_block v[1:2], v[3:34], off th:TH_STORE_HT scope:SCOPE_SE
+// GFX12: encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0xa4,0x01,0x01,0x00,0x00,0x00]
+
global_inv
// GFX12: encoding: [0x7c,0xc0,0x0a,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
@@ -2490,6 +2532,36 @@ scratch_load_u8 v1, v0, s0 offset:64
scratch_load_u8 v1, v2, s1
// GFX12: encoding: [0x01,0x00,0x04,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
+scratch_load_block v[3:34], off, off offset:-64
+// GFX12: encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
+
+scratch_load_block v[3:34], off, off offset:64
+// GFX12: encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
+
+scratch_load_block v[3:34], off, s0 offset:-64
+// GFX12: encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
+
+scratch_load_block v[3:34], off, s0 offset:64
+// GFX12: encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
+
+scratch_load_block v[3:34], v0, off offset:-64
+// GFX12: encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0xc0,0xff,0xff]
+
+scratch_load_block v[3:34], v0, off offset:64
+// GFX12: encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0x40,0x00,0x00]
+
+scratch_load_block v[3:34], v0, s0 offset:-64
+// GFX12: encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0xc0,0xff,0xff]
+
+scratch_load_block v[3:34], v0, s0 offset:64
+// GFX12: encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0x40,0x00,0x00]
+
+scratch_load_block v[3:34], v2, s1
+// GFX12: encoding: [0x01,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
+
+scratch_load_block v[3:34], v2, s1 th:TH_LOAD_HT scope:SCOPE_SE
+// GFX12: encoding: [0x01,0xc0,0x14,0xed,0x03,0x00,0x26,0x00,0x02,0x00,0x00,0x00]
+
scratch_store_b128 off, v[2:5], off offset:-64
// GFX12: encoding: [0x7c,0x40,0x07,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
@@ -2732,3 +2804,33 @@ scratch_store_d16_hi_b8 v0, v2, s0 offset:64
scratch_store_d16_hi_b8 v1, v2, s3
// GFX12: encoding: [0x03,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00]
+
+scratch_store_block off, v[2:33], off offset:-64
+// GFX12: encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
+
+scratch_store_block off, v[2:33], off offset:64
+// GFX12: encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
+
+scratch_store_block off, v[2:33], s0 offset:-64
+// GFX12: encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
+
+scratch_store_block off, v[2:33], s0 offset:64
+// GFX12: encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
+
+scratch_store_block v0, v[2:33], off offset:-64
+// GFX12: encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff]
+
+scratch_store_block v0, v[2:33], off offset:64
+// GFX12: encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00]
+
+scratch_store_block v0, v[2:33], s0 offset:-64
+// GFX12: encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff]
+
+scratch_store_block v0, v[2:33], s0 offset:64
+// GFX12: encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00]
+
+scratch_store_block v1, v[2:33], s3
+// GFX12: encoding: [0x03,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00]
+
+scratch_store_block v1, v[2:33], s3 th:TH_STORE_HT scope:SCOPE_SE
+// GFX12: encoding: [0x03,0x00,0x15,0xed,0x00,0x00,0x26,0x01,0x01,0x00,0x00,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vflat.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vflat.txt
index e0b658b1fda3b..7953e0eb67c1b 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vflat.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vflat.txt
@@ -1155,6 +1155,27 @@
# GFX12: global_load_u8 v1, v[3:4], off ; encoding: [0x7c,0x00,0x04,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00]
0x7c,0x00,0x04,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00
+# GFX12: global_load_block v[9:40], v0, s[0:1] offset:-64 ; encoding: [0x00,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
+0x00,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0xc0,0xff,0xff
+
+# GFX12: global_load_block v[9:40], v0, s[0:1] offset:64 ; encoding: [0x00,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
+0x00,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0x40,0x00,0x00
+
+# GFX12: global_load_block v[9:40], v5, s[2:3] ; encoding: [0x02,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x05,0x00,0x00,0x00]
+0x02,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x05,0x00,0x00,0x00
+
+# GFX12: global_load_block v[9:40], v[0:1], off offset:-64 ; encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
+0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0xc0,0xff,0xff
+
+# GFX12: global_load_block v[9:40], v[0:1], off offset:64 ; encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
+0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0x40,0x00,0x00
+
+# GFX12: global_load_block v[9:40], v[5:6], off ; encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x05,0x00,0x00,0x00]
+0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x05,0x00,0x00,0x00
+
+# GFX12: global_load_block v[9:40], v[5:6], off th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x24,0x00,0x05,0x00,0x00,0x00]
+0x7c,0xc0,0x14,0xee,0x09,0x00,0x24,0x00,0x05,0x00,0x00,0x00
+
# GFX12: global_store_addtid_b32 v2, off offset:64 ; encoding: [0x7c,0x40,0x0a,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
0x7c,0x40,0x0a,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00
@@ -1257,6 +1278,27 @@
# GFX12: global_store_d16_hi_b8 v[3:4], v1, off ; encoding: [0x7c,0x00,0x09,0xee,0x00,0x00,0x80,0x00,0x03,0x00,0x00,0x00]
0x7c,0x00,0x09,0xee,0x00,0x00,0x80,0x00,0x03,0x00,0x00,0x00
+# GFX12: global_store_block v0, v[2:33], s[0:1] offset:-64 ; encoding: [0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
+0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff
+
+# GFX12: global_store_block v0, v[2:33], s[0:1] offset:64 ; encoding: [0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
+0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00
+
+# GFX12: global_store_block v1, v[3:34], s[2:3] ; encoding: [0x02,0x00,0x15,0xee,0x00,0x00,0x80,0x01,0x01,0x00,0x00,0x00]
+0x02,0x00,0x15,0xee,0x00,0x00,0x80,0x01,0x01,0x00,0x00,0x00
+
+# GFX12: global_store_block v[0:1], v[2:33], off offset:-64 ; encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
+0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff
+
+# GFX12: global_store_block v[0:1], v[2:33], off offset:64 ; encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
+0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00
+
+# GFX12: global_store_block v[1:2], v[3:34], off ; encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x80,0x01,0x01,0x00,0x00,0x00]
+0x7c,0x00,0x15,0xee,0x00,0x00,0x80,0x01,0x01,0x00,0x00,0x00
+
+# GFX12: global_store_block v[1:2], v[3:34], off th:TH_STORE_HT scope:SCOPE_SE ; encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0xa4,0x01,0x01,0x00,0x00,0x00]
+0x7c,0x00,0x15,0xee,0x00,0x00,0xa4,0x01,0x01,0x00,0x00,0x00
+
# GFX12: global_inv ; encoding: [0x7c,0xc0,0x0a,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
0x7c,0xc0,0x0a,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
@@ -1518,6 +1560,36 @@
# GFX12: scratch_load_u8 v1, v2, s1 ; encoding: [0x01,0x00,0x04,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
0x01,0x00,0x04,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00
+# GFX12: scratch_load_block v[3:34], off, off offset:-64 ; encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
+0x7c,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0xc0,0xff,0xff
+
+# GFX12: scratch_load_block v[3:34], off, off offset:64 ; encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
+0x7c,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0x40,0x00,0x00
+
+# GFX12: scratch_load_block v[3:34], off, s0 offset:-64 ; encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
+0x00,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0xc0,0xff,0xff
+
+# GFX12: scratch_load_block v[3:34], off, s0 offset:64 ; encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
+0x00,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0x40,0x00,0x00
+
+# GFX12: scratch_load_block v[3:34], v0, off offset:-64 ; encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0xc0,0xff,0xff]
+0x7c,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0xc0,0xff,0xff
+
+# GFX12: scratch_load_block v[3:34], v0, off offset:64 ; encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0x40,0x00,0x00]
+0x7c,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0x40,0x00,0x00
+
+# GFX12: scratch_load_block v[3:34], v0, s0 offset:-64 ; encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0xc0,0xff,0xff]
+0x00,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0xc0,0xff,0xff
+
+# GFX12: scratch_load_block v[3:34], v0, s0 offset:64 ; encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0x40,0x00,0x00]
+0x00,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0x40,0x00,0x00
+
+# GFX12: scratch_load_block v[3:34], v2, s1 ; encoding: [0x01,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
+0x01,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x02,0x00,0x00,0x00
+
+# GFX12: scratch_load_block v[3:34], v2, s1 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x01,0xc0,0x14,0xed,0x03,0x00,0x26,0x00,0x02,0x00,0x00,0x00]
+0x01,0xc0,0x14,0xed,0x03,0x00,0x26,0x00,0x02,0x00,0x00,0x00
+
# GFX12: scratch_store_b128 off, v[2:5], off offset:64 ; encoding: [0x7c,0x40,0x07,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
0x7c,0x40,0x07,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00
@@ -1658,3 +1730,33 @@
# GFX12: scratch_store_d16_hi_b8 v1, v2, s3 ; encoding: [0x03,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00]
0x03,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00
+
+# GFX12: scratch_store_block off, v[2:33], off offset:-64 ; encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
+0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff
+
+# GFX12: scratch_store_block off, v[2:33], off offset:64 ; encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
+0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00
+
+# GFX12: scratch_store_block off, v[2:33], s0 offset:-64 ; encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
+0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff
+
+# GFX12: scratch_store_block off, v[2:33], s0 offset:64 ; encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
+0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00
+
+# GFX12: scratch_store_block v0, v[2:33], off offset:-64 ; encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff]
+0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff
+
+# GFX12: scratch_store_block v0, v[2:33], off offset:64 ; encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00]
+0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00
+
+# GFX12: scratch_store_block v0, v[2:33], s0 offset:-64 ; encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff]
+0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff
+
+# GFX12: scratch_store_block v0, v[2:33], s0 offset:64 ; encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00]
+0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00
+
+# GFX12: scratch_store_block v1, v[2:33], s3 ; encoding: [0x03,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00]
+0x03,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00
+
+# GFX12: scratch_store_block v1, v[2:33], s3 th:TH_STORE_HT scope:SCOPE_SE ; encoding: [0x03,0x00,0x15,0xed,0x00,0x00,0x26,0x01,0x01,0x00,0x00,0x00]
+0x03,0x00,0x15,0xed,0x00,0x00,0x26,0x01,0x01,0x00,0x00,0x00
More information about the llvm-commits
mailing list