[llvm] fbf0ca6 - [AMDGPU][GFX12] Add support for new block ls instructions (#96273)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 21 11:12:21 PDT 2024


Author: Mariusz Sikora
Date: 2024-06-21T20:12:18+02:00
New Revision: fbf0ca64182baa5fac73fa012ed1fb6805fa3581

URL: https://github.com/llvm/llvm-project/commit/fbf0ca64182baa5fac73fa012ed1fb6805fa3581
DIFF: https://github.com/llvm/llvm-project/commit/fbf0ca64182baa5fac73fa012ed1fb6805fa3581.diff

LOG: [AMDGPU][GFX12] Add support for new block ls instructions (#96273)

Add MC layer support for new instructions:

GLOBAL_LOAD_BLOCK
GLOBAL_STORE_BLOCK
SCRATCH_LOAD_BLOCK
SCRATCH_STORE_BLOCK

Co-authored-by: Piotr Sobczak <piotr.sobczak at amd.com>

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/FLATInstructions.td
    llvm/lib/Target/AMDGPU/SIInstrInfo.td
    llvm/lib/Target/AMDGPU/SIRegisterInfo.td
    llvm/test/MC/AMDGPU/gfx12_asm_vflat.s
    llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vflat.txt

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 3d2a74adab965..8ecbd62903a24 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -966,6 +966,15 @@ defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_s
 defm SCRATCH_LOAD_LDS_DWORD  : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_dword">;
 
 let SubtargetPredicate = isGFX12Plus in {
+  let Uses = [EXEC, M0] in {
+    defm GLOBAL_LOAD_BLOCK  : FLAT_Global_Load_Pseudo <"global_load_block", VReg_1024>;
+    defm GLOBAL_STORE_BLOCK  : FLAT_Global_Store_Pseudo <"global_store_block", VReg_1024>;
+  }
+  let Uses = [EXEC, FLAT_SCR, M0] in {
+    defm SCRATCH_LOAD_BLOCK : FLAT_Scratch_Load_Pseudo <"scratch_load_block", VReg_1024>;
+    defm SCRATCH_STORE_BLOCK : FLAT_Scratch_Store_Pseudo <"scratch_store_block", VReg_1024>;
+  }
+
   let WaveSizePredicate = isWave32 in {
     let Mnemonic = "global_load_tr_b128" in
     defm GLOBAL_LOAD_TR_B128_w32  : FLAT_Global_Load_Pseudo <"global_load_tr_b128_w32", VReg_128>;
@@ -2658,6 +2667,8 @@ defm GLOBAL_STORE_BYTE_D16_HI      : VGLOBAL_Real_AllAddr_gfx12<0x024, "global_s
 defm GLOBAL_STORE_SHORT_D16_HI     : VGLOBAL_Real_AllAddr_gfx12<0x025, "global_store_d16_hi_b16">;
 defm GLOBAL_LOAD_DWORD_ADDTID      : VGLOBAL_Real_AllAddr_gfx12<0x028, "global_load_addtid_b32">;
 defm GLOBAL_STORE_DWORD_ADDTID     : VGLOBAL_Real_AllAddr_gfx12<0x029, "global_store_addtid_b32">;
+defm GLOBAL_LOAD_BLOCK             : VGLOBAL_Real_AllAddr_gfx12<0x053>;
+defm GLOBAL_STORE_BLOCK            : VGLOBAL_Real_AllAddr_gfx12<0x054>;
 
 defm GLOBAL_ATOMIC_SWAP            : VGLOBAL_Real_Atomics_gfx12<0x033, "global_atomic_swap_b32">;
 defm GLOBAL_ATOMIC_CMPSWAP         : VGLOBAL_Real_Atomics_gfx12<0x034, "global_atomic_cmpswap_b32">;
@@ -2728,3 +2739,6 @@ defm SCRATCH_LOAD_SBYTE_D16_HI     : VSCRATCH_Real_AllAddr_gfx12<0x22, "scratch_
 defm SCRATCH_LOAD_SHORT_D16_HI     : VSCRATCH_Real_AllAddr_gfx12<0x23, "scratch_load_d16_hi_b16">;
 defm SCRATCH_STORE_BYTE_D16_HI     : VSCRATCH_Real_AllAddr_gfx12<0x24, "scratch_store_d16_hi_b8">;
 defm SCRATCH_STORE_SHORT_D16_HI    : VSCRATCH_Real_AllAddr_gfx12<0x25, "scratch_store_d16_hi_b16">;
+
+defm SCRATCH_LOAD_BLOCK            : VSCRATCH_Real_AllAddr_gfx12<0x53>;
+defm SCRATCH_STORE_BLOCK           : VSCRATCH_Real_AllAddr_gfx12<0x54>;

diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 80c623514bda1..c64b3a7c356f2 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2230,7 +2230,8 @@ class getLdStRegisterOperand<RegisterClass RC> {
           !eq(RC.Size, 64)   : AVLdSt_64,
           !eq(RC.Size, 96)   : AVLdSt_96,
           !eq(RC.Size, 128)  : AVLdSt_128,
-          !eq(RC.Size, 160)  : AVLdSt_160);
+          !eq(RC.Size, 160)  : AVLdSt_160,
+          !eq(RC.Size, 1024) : AVLdSt_1024);
 }
 
 class getHasVOP3DPP <ValueType DstVT = i32, ValueType Src0VT = i32,

diff  --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index a8efe2b2ba35e..f1d9aec163635 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -1344,6 +1344,7 @@ def AVLdSt_64 : AVLdStOperand<AV_64, "OPW64">;
 def AVLdSt_96 : AVLdStOperand<AV_96, "OPW96">;
 def AVLdSt_128 : AVLdStOperand<AV_128, "OPW128">;
 def AVLdSt_160 : AVLdStOperand<AV_160, "OPW160">;
+def AVLdSt_1024 : AVLdStOperand<AV_1024, "OPW1024">;
 
 //===----------------------------------------------------------------------===//
 //  ACSrc_* Operands with an AGPR or an inline constant

diff  --git a/llvm/test/MC/AMDGPU/gfx12_asm_vflat.s b/llvm/test/MC/AMDGPU/gfx12_asm_vflat.s
index 575bc1a8255a6..30bfaff8f17aa 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vflat.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vflat.s
@@ -1896,6 +1896,27 @@ global_load_u8 v1, v[0:1], off offset:64
 global_load_u8 v1, v[3:4], off
 // GFX12: encoding: [0x7c,0x00,0x04,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00]
 
+global_load_block v[9:40], v0, s[0:1] offset:-64
+// GFX12: encoding: [0x00,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
+
+global_load_block v[9:40], v0, s[0:1] offset:64
+// GFX12: encoding: [0x00,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
+
+global_load_block v[9:40], v5, s[2:3]
+// GFX12: encoding: [0x02,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x05,0x00,0x00,0x00]
+
+global_load_block v[9:40], v[0:1], off offset:-64
+// GFX12: encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
+
+global_load_block v[9:40], v[0:1], off offset:64
+// GFX12: encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
+
+global_load_block v[9:40], v[5:6], off
+// GFX12: encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x05,0x00,0x00,0x00]
+
+global_load_block v[9:40], v[5:6], off th:TH_LOAD_HT scope:SCOPE_SE
+// GFX12: encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x24,0x00,0x05,0x00,0x00,0x00]
+
 global_store_addtid_b32 v2, off offset:-64
 // GFX12: encoding: [0x7c,0x40,0x0a,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
 
@@ -2058,6 +2079,27 @@ global_store_d16_hi_b8 v[0:1], v2, off offset:64
 global_store_d16_hi_b8 v[3:4], v1, off
 // GFX12: encoding: [0x7c,0x00,0x09,0xee,0x00,0x00,0x80,0x00,0x03,0x00,0x00,0x00]
 
+global_store_block v0, v[2:33], s[0:1] offset:-64
+// GFX12: encoding: [0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
+
+global_store_block v0, v[2:33], s[0:1] offset:64
+// GFX12: encoding: [0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
+
+global_store_block v1, v[3:34], s[2:3]
+// GFX12: encoding: [0x02,0x00,0x15,0xee,0x00,0x00,0x80,0x01,0x01,0x00,0x00,0x00]
+
+global_store_block v[0:1], v[2:33], off offset:-64
+// GFX12: encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
+
+global_store_block v[0:1], v[2:33], off offset:64
+// GFX12: encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
+
+global_store_block v[1:2], v[3:34], off
+// GFX12: encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x80,0x01,0x01,0x00,0x00,0x00]
+
+global_store_block v[1:2], v[3:34], off th:TH_STORE_HT scope:SCOPE_SE
+// GFX12: encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0xa4,0x01,0x01,0x00,0x00,0x00]
+
 global_inv
 // GFX12: encoding: [0x7c,0xc0,0x0a,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
 
@@ -2490,6 +2532,36 @@ scratch_load_u8 v1, v0, s0 offset:64
 scratch_load_u8 v1, v2, s1
 // GFX12: encoding: [0x01,0x00,0x04,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
 
+scratch_load_block v[3:34], off, off offset:-64
+// GFX12: encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
+
+scratch_load_block v[3:34], off, off offset:64
+// GFX12: encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
+
+scratch_load_block v[3:34], off, s0 offset:-64
+// GFX12: encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
+
+scratch_load_block v[3:34], off, s0 offset:64
+// GFX12: encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
+
+scratch_load_block v[3:34], v0, off offset:-64
+// GFX12: encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0xc0,0xff,0xff]
+
+scratch_load_block v[3:34], v0, off offset:64
+// GFX12: encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0x40,0x00,0x00]
+
+scratch_load_block v[3:34], v0, s0 offset:-64
+// GFX12: encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0xc0,0xff,0xff]
+
+scratch_load_block v[3:34], v0, s0 offset:64
+// GFX12: encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0x40,0x00,0x00]
+
+scratch_load_block v[3:34], v2, s1
+// GFX12: encoding: [0x01,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
+
+scratch_load_block v[3:34], v2, s1 th:TH_LOAD_HT scope:SCOPE_SE
+// GFX12: encoding: [0x01,0xc0,0x14,0xed,0x03,0x00,0x26,0x00,0x02,0x00,0x00,0x00]
+
 scratch_store_b128 off, v[2:5], off offset:-64
 // GFX12: encoding: [0x7c,0x40,0x07,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
 
@@ -2732,3 +2804,33 @@ scratch_store_d16_hi_b8 v0, v2, s0 offset:64
 
 scratch_store_d16_hi_b8 v1, v2, s3
 // GFX12: encoding: [0x03,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00]
+
+scratch_store_block off, v[2:33], off offset:-64
+// GFX12: encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
+
+scratch_store_block off, v[2:33], off offset:64
+// GFX12: encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
+
+scratch_store_block off, v[2:33], s0 offset:-64
+// GFX12: encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
+
+scratch_store_block off, v[2:33], s0 offset:64
+// GFX12: encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
+
+scratch_store_block v0, v[2:33], off offset:-64
+// GFX12: encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff]
+
+scratch_store_block v0, v[2:33], off offset:64
+// GFX12: encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00]
+
+scratch_store_block v0, v[2:33], s0 offset:-64
+// GFX12: encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff]
+
+scratch_store_block v0, v[2:33], s0 offset:64
+// GFX12: encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00]
+
+scratch_store_block v1, v[2:33], s3
+// GFX12: encoding: [0x03,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00]
+
+scratch_store_block v1, v[2:33], s3 th:TH_STORE_HT scope:SCOPE_SE
+// GFX12: encoding: [0x03,0x00,0x15,0xed,0x00,0x00,0x26,0x01,0x01,0x00,0x00,0x00]

diff  --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vflat.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vflat.txt
index e0b658b1fda3b..7953e0eb67c1b 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vflat.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vflat.txt
@@ -1155,6 +1155,27 @@
 # GFX12: global_load_u8 v1, v[3:4], off          ; encoding: [0x7c,0x00,0x04,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00]
 0x7c,0x00,0x04,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00
 
+# GFX12: global_load_block v[9:40], v0, s[0:1] offset:-64 ; encoding: [0x00,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
+0x00,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0xc0,0xff,0xff
+
+# GFX12: global_load_block v[9:40], v0, s[0:1] offset:64 ; encoding: [0x00,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
+0x00,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0x40,0x00,0x00
+
+# GFX12: global_load_block v[9:40], v5, s[2:3] ; encoding: [0x02,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x05,0x00,0x00,0x00]
+0x02,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x05,0x00,0x00,0x00
+
+# GFX12: global_load_block v[9:40], v[0:1], off offset:-64 ; encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
+0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0xc0,0xff,0xff
+
+# GFX12: global_load_block v[9:40], v[0:1], off offset:64 ; encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
+0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0x40,0x00,0x00
+
+# GFX12: global_load_block v[9:40], v[5:6], off ; encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x05,0x00,0x00,0x00]
+0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x05,0x00,0x00,0x00
+
+# GFX12: global_load_block v[9:40], v[5:6], off th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x24,0x00,0x05,0x00,0x00,0x00]
+0x7c,0xc0,0x14,0xee,0x09,0x00,0x24,0x00,0x05,0x00,0x00,0x00
+
 # GFX12: global_store_addtid_b32 v2, off offset:64 ; encoding: [0x7c,0x40,0x0a,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
 0x7c,0x40,0x0a,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00
 
@@ -1257,6 +1278,27 @@
 # GFX12: global_store_d16_hi_b8 v[3:4], v1, off  ; encoding: [0x7c,0x00,0x09,0xee,0x00,0x00,0x80,0x00,0x03,0x00,0x00,0x00]
 0x7c,0x00,0x09,0xee,0x00,0x00,0x80,0x00,0x03,0x00,0x00,0x00
 
+# GFX12: global_store_block v0, v[2:33], s[0:1] offset:-64 ; encoding: [0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
+0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff
+
+# GFX12: global_store_block v0, v[2:33], s[0:1] offset:64 ; encoding: [0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
+0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00
+
+# GFX12: global_store_block v1, v[3:34], s[2:3] ; encoding: [0x02,0x00,0x15,0xee,0x00,0x00,0x80,0x01,0x01,0x00,0x00,0x00]
+0x02,0x00,0x15,0xee,0x00,0x00,0x80,0x01,0x01,0x00,0x00,0x00
+
+# GFX12: global_store_block v[0:1], v[2:33], off offset:-64 ; encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
+0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff
+
+# GFX12: global_store_block v[0:1], v[2:33], off offset:64 ; encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
+0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00
+
+# GFX12: global_store_block v[1:2], v[3:34], off ; encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x80,0x01,0x01,0x00,0x00,0x00]
+0x7c,0x00,0x15,0xee,0x00,0x00,0x80,0x01,0x01,0x00,0x00,0x00
+
+# GFX12: global_store_block v[1:2], v[3:34], off th:TH_STORE_HT scope:SCOPE_SE ; encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0xa4,0x01,0x01,0x00,0x00,0x00]
+0x7c,0x00,0x15,0xee,0x00,0x00,0xa4,0x01,0x01,0x00,0x00,0x00
+
 # GFX12: global_inv ; encoding: [0x7c,0xc0,0x0a,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
 0x7c,0xc0,0x0a,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
 
@@ -1518,6 +1560,36 @@
 # GFX12: scratch_load_u8 v1, v2, s1              ; encoding: [0x01,0x00,0x04,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
 0x01,0x00,0x04,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00
 
+# GFX12: scratch_load_block v[3:34], off, off offset:-64 ; encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
+0x7c,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0xc0,0xff,0xff
+
+# GFX12: scratch_load_block v[3:34], off, off offset:64 ; encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
+0x7c,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0x40,0x00,0x00
+
+# GFX12: scratch_load_block v[3:34], off, s0 offset:-64 ; encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
+0x00,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0xc0,0xff,0xff
+
+# GFX12: scratch_load_block v[3:34], off, s0 offset:64 ; encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
+0x00,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0x40,0x00,0x00
+
+# GFX12: scratch_load_block v[3:34], v0, off offset:-64 ; encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0xc0,0xff,0xff]
+0x7c,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0xc0,0xff,0xff
+
+# GFX12: scratch_load_block v[3:34], v0, off offset:64 ; encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0x40,0x00,0x00]
+0x7c,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0x40,0x00,0x00
+
+# GFX12: scratch_load_block v[3:34], v0, s0 offset:-64 ; encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0xc0,0xff,0xff]
+0x00,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0xc0,0xff,0xff
+
+# GFX12: scratch_load_block v[3:34], v0, s0 offset:64 ; encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0x40,0x00,0x00]
+0x00,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0x40,0x00,0x00
+
+# GFX12: scratch_load_block v[3:34], v2, s1 ; encoding: [0x01,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
+0x01,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x02,0x00,0x00,0x00
+
+# GFX12: scratch_load_block v[3:34], v2, s1 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x01,0xc0,0x14,0xed,0x03,0x00,0x26,0x00,0x02,0x00,0x00,0x00]
+0x01,0xc0,0x14,0xed,0x03,0x00,0x26,0x00,0x02,0x00,0x00,0x00
+
 # GFX12: scratch_store_b128 off, v[2:5], off offset:64 ; encoding: [0x7c,0x40,0x07,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
 0x7c,0x40,0x07,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00
 
@@ -1658,3 +1730,33 @@
 
 # GFX12: scratch_store_d16_hi_b8 v1, v2, s3      ; encoding: [0x03,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00]
 0x03,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00
+
+# GFX12: scratch_store_block off, v[2:33], off offset:-64 ; encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
+0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff
+
+# GFX12: scratch_store_block off, v[2:33], off offset:64 ; encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
+0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00
+
+# GFX12: scratch_store_block off, v[2:33], s0 offset:-64 ; encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
+0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff
+
+# GFX12: scratch_store_block off, v[2:33], s0 offset:64 ; encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
+0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00
+
+# GFX12: scratch_store_block v0, v[2:33], off offset:-64 ; encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff]
+0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff
+
+# GFX12: scratch_store_block v0, v[2:33], off offset:64 ; encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00]
+0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00
+
+# GFX12: scratch_store_block v0, v[2:33], s0 offset:-64 ; encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff]
+0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff
+
+# GFX12: scratch_store_block v0, v[2:33], s0 offset:64 ; encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00]
+0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00
+
+# GFX12: scratch_store_block v1, v[2:33], s3 ; encoding: [0x03,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00]
+0x03,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00
+
+# GFX12: scratch_store_block v1, v[2:33], s3 th:TH_STORE_HT scope:SCOPE_SE ; encoding: [0x03,0x00,0x15,0xed,0x00,0x00,0x26,0x01,0x01,0x00,0x00,0x00]
+0x03,0x00,0x15,0xed,0x00,0x00,0x26,0x01,0x01,0x00,0x00,0x00


        


More information about the llvm-commits mailing list