[llvm] [AMDGPU] Add GXF12 8- and 16-bit SMEM loads (PR #76966)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 4 06:59:27 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mc
Author: Jay Foad (jayfoad)
<details>
<summary>Changes</summary>
---
Patch is 57.58 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/76966.diff
3 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SMInstructions.td (+18)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_smem.s (+564)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_smem.txt (+513)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index be21cf0140fc85..087ee65aa03fd3 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -305,6 +305,10 @@ let SubtargetPredicate = HasScalarDwordx3Loads in
defm S_LOAD_DWORDX4 : SM_Pseudo_Loads <SReg_64, SReg_128>;
defm S_LOAD_DWORDX8 : SM_Pseudo_Loads <SReg_64, SReg_256>;
defm S_LOAD_DWORDX16 : SM_Pseudo_Loads <SReg_64, SReg_512>;
+defm S_LOAD_I8 : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
+defm S_LOAD_U8 : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
+defm S_LOAD_I16 : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
+defm S_LOAD_U16 : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
let is_buffer = 1 in {
defm S_BUFFER_LOAD_DWORD : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
@@ -316,6 +320,10 @@ let SubtargetPredicate = HasScalarDwordx3Loads in
defm S_BUFFER_LOAD_DWORDX4 : SM_Pseudo_Loads <SReg_128, SReg_128>;
defm S_BUFFER_LOAD_DWORDX8 : SM_Pseudo_Loads <SReg_128, SReg_256>;
defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads <SReg_128, SReg_512>;
+defm S_BUFFER_LOAD_I8 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_LOAD_U8 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_LOAD_I16 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_LOAD_U16 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
}
let SubtargetPredicate = HasScalarStores in {
@@ -1336,6 +1344,11 @@ defm S_LOAD_B128 : SM_Real_Loads_gfx12<0x02, "S_LOAD_DWORDX4">;
defm S_LOAD_B256 : SM_Real_Loads_gfx12<0x03, "S_LOAD_DWORDX8">;
defm S_LOAD_B512 : SM_Real_Loads_gfx12<0x04, "S_LOAD_DWORDX16">;
+defm S_LOAD_I8 : SM_Real_Loads_gfx12<0x08>;
+defm S_LOAD_U8 : SM_Real_Loads_gfx12<0x09>;
+defm S_LOAD_I16 : SM_Real_Loads_gfx12<0x0a>;
+defm S_LOAD_U16 : SM_Real_Loads_gfx12<0x0b>;
+
defm S_BUFFER_LOAD_B32 : SM_Real_Loads_gfx12<0x10, "S_BUFFER_LOAD_DWORD">;
defm S_BUFFER_LOAD_B64 : SM_Real_Loads_gfx12<0x11, "S_BUFFER_LOAD_DWORDX2">;
defm S_BUFFER_LOAD_B96 : SM_Real_Loads_gfx12<0x15, "S_BUFFER_LOAD_DWORDX3">;
@@ -1343,6 +1356,11 @@ defm S_BUFFER_LOAD_B128 : SM_Real_Loads_gfx12<0x12, "S_BUFFER_LOAD_DWORDX4">;
defm S_BUFFER_LOAD_B256 : SM_Real_Loads_gfx12<0x13, "S_BUFFER_LOAD_DWORDX8">;
defm S_BUFFER_LOAD_B512 : SM_Real_Loads_gfx12<0x14, "S_BUFFER_LOAD_DWORDX16">;
+defm S_BUFFER_LOAD_I8 : SM_Real_Loads_gfx12<0x18>;
+defm S_BUFFER_LOAD_U8 : SM_Real_Loads_gfx12<0x19>;
+defm S_BUFFER_LOAD_I16 : SM_Real_Loads_gfx12<0x1a>;
+defm S_BUFFER_LOAD_U16 : SM_Real_Loads_gfx12<0x1b>;
+
def S_DCACHE_INV_gfx12 : SMEM_Real_gfx12<0x021, S_DCACHE_INV>;
def S_PREFETCH_INST_gfx12 : SMEM_Real_Prefetch_gfx12<0x24, S_PREFETCH_INST>;
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_smem.s b/llvm/test/MC/AMDGPU/gfx12_asm_smem.s
index 1566b9c04e3494..eb59607755da0e 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_smem.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_smem.s
@@ -34,6 +34,306 @@ s_buffer_prefetch_data s[20:23], 100, s10, 7
s_buffer_prefetch_data s[20:23], 100, null, 7
// GFX12: s_buffer_prefetch_data s[20:23], 0x64, null, 7 ; encoding: [0xca,0xe1,0x04,0xf4,0x64,0x00,0x00,0xf8]
+s_load_i8 s5, s[2:3], s0
+// GFX12: encoding: [0x41,0x01,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s101, s[2:3], s0
+// GFX12: encoding: [0x41,0x19,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 vcc_lo, s[2:3], s0
+// GFX12: encoding: [0x81,0x1a,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 vcc_hi, s[2:3], s0
+// GFX12: encoding: [0xc1,0x1a,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[4:5], s0
+// GFX12: encoding: [0x42,0x01,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[100:101], s0
+// GFX12: encoding: [0x72,0x01,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, vcc, s0
+// GFX12: encoding: [0x75,0x01,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s101
+// GFX12: encoding: [0x41,0x01,0x01,0xf4,0x00,0x00,0x00,0xca]
+
+s_load_i8 s5, s[2:3], vcc_lo
+// GFX12: encoding: [0x41,0x01,0x01,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_i8 s5, s[2:3], vcc_hi
+// GFX12: encoding: [0x41,0x01,0x01,0xf4,0x00,0x00,0x00,0xd6]
+
+s_load_i8 s5, s[2:3], m0
+// GFX12: encoding: [0x41,0x01,0x01,0xf4,0x00,0x00,0x00,0xfa]
+
+s_load_i8 s5, s[2:3], 0x0
+// GFX12: encoding: [0x41,0x01,0x01,0xf4,0x00,0x00,0x00,0xf8]
+
+s_load_i8 s5, s[2:3], s7 offset:0x12345
+// GFX12: encoding: [0x41,0x01,0x01,0xf4,0x45,0x23,0x01,0x0e]
+
+s_load_u8 s5, s[2:3], s0
+// GFX12: encoding: [0x41,0x21,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0
+// GFX12: encoding: [0x41,0x39,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 vcc_lo, s[2:3], s0
+// GFX12: encoding: [0x81,0x3a,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 vcc_hi, s[2:3], s0
+// GFX12: encoding: [0xc1,0x3a,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s5, s[4:5], s0
+// GFX12: encoding: [0x42,0x21,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s5, s[100:101], s0
+// GFX12: encoding: [0x72,0x21,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s5, vcc, s0
+// GFX12: encoding: [0x75,0x21,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s5, s[2:3], s101
+// GFX12: encoding: [0x41,0x21,0x01,0xf4,0x00,0x00,0x00,0xca]
+
+s_load_u8 s5, s[2:3], vcc_lo
+// GFX12: encoding: [0x41,0x21,0x01,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_u8 s5, s[2:3], vcc_hi
+// GFX12: encoding: [0x41,0x21,0x01,0xf4,0x00,0x00,0x00,0xd6]
+
+s_load_u8 s5, s[2:3], m0
+// GFX12: encoding: [0x41,0x21,0x01,0xf4,0x00,0x00,0x00,0xfa]
+
+s_load_u8 s5, s[2:3], 0x0
+// GFX12: encoding: [0x41,0x21,0x01,0xf4,0x00,0x00,0x00,0xf8]
+
+s_load_u8 s5, s[2:3], s7 offset:0x12345
+// GFX12: encoding: [0x41,0x21,0x01,0xf4,0x45,0x23,0x01,0x0e]
+
+s_buffer_load_i8 s5, s[4:7], s0
+// GFX12: encoding: [0x42,0x01,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i8 s101, s[4:7], s0
+// GFX12: encoding: [0x42,0x19,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i8 vcc_lo, s[4:7], s0
+// GFX12: encoding: [0x82,0x1a,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i8 vcc_hi, s[4:7], s0
+// GFX12: encoding: [0xc2,0x1a,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i8 s5, s[8:11], s0
+// GFX12: encoding: [0x44,0x01,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i8 s5, s[96:99], s0
+// GFX12: encoding: [0x70,0x01,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i8 s5, s[4:7], s101
+// GFX12: encoding: [0x42,0x01,0x03,0xf4,0x00,0x00,0x00,0xca]
+
+s_buffer_load_i8 s5, s[4:7], vcc_lo
+// GFX12: encoding: [0x42,0x01,0x03,0xf4,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_i8 s5, s[4:7], vcc_hi
+// GFX12: encoding: [0x42,0x01,0x03,0xf4,0x00,0x00,0x00,0xd6]
+
+s_buffer_load_i8 s5, s[4:7], m0
+// GFX12: encoding: [0x42,0x01,0x03,0xf4,0x00,0x00,0x00,0xfa]
+
+s_buffer_load_i8 s5, s[4:7], 0x0
+// GFX12: encoding: [0x42,0x01,0x03,0xf4,0x00,0x00,0x00,0xf8]
+
+s_buffer_load_i8 s5, s[4:7], s0 offset:0x12345
+// GFX12: encoding: [0x42,0x01,0x03,0xf4,0x45,0x23,0x01,0x00]
+
+s_buffer_load_u8 s5, s[4:7], s0
+// GFX12: encoding: [0x42,0x21,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u8 s101, s[4:7], s0
+// GFX12: encoding: [0x42,0x39,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u8 vcc_lo, s[4:7], s0
+// GFX12: encoding: [0x82,0x3a,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u8 vcc_hi, s[4:7], s0
+// GFX12: encoding: [0xc2,0x3a,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u8 s5, s[8:11], s0
+// GFX12: encoding: [0x44,0x21,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u8 s5, s[96:99], s0
+// GFX12: encoding: [0x70,0x21,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u8 s5, s[4:7], s101
+// GFX12: encoding: [0x42,0x21,0x03,0xf4,0x00,0x00,0x00,0xca]
+
+s_buffer_load_u8 s5, s[4:7], vcc_lo
+// GFX12: encoding: [0x42,0x21,0x03,0xf4,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_u8 s5, s[4:7], vcc_hi
+// GFX12: encoding: [0x42,0x21,0x03,0xf4,0x00,0x00,0x00,0xd6]
+
+s_buffer_load_u8 s5, s[4:7], m0
+// GFX12: encoding: [0x42,0x21,0x03,0xf4,0x00,0x00,0x00,0xfa]
+
+s_buffer_load_u8 s5, s[4:7], 0x0
+// GFX12: encoding: [0x42,0x21,0x03,0xf4,0x00,0x00,0x00,0xf8]
+
+s_buffer_load_u8 s5, s[4:7], s0 offset:0x12345
+// GFX12: encoding: [0x42,0x21,0x03,0xf4,0x45,0x23,0x01,0x00]
+
+s_load_i16 s5, s[2:3], s0
+// GFX12: encoding: [0x41,0x41,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s101, s[2:3], s0
+// GFX12: encoding: [0x41,0x59,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 vcc_lo, s[2:3], s0
+// GFX12: encoding: [0x81,0x5a,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 vcc_hi, s[2:3], s0
+// GFX12: encoding: [0xc1,0x5a,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[4:5], s0
+// GFX12: encoding: [0x42,0x41,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0
+// GFX12: encoding: [0x72,0x41,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, vcc, s0
+// GFX12: encoding: [0x75,0x41,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[2:3], s101
+// GFX12: encoding: [0x41,0x41,0x01,0xf4,0x00,0x00,0x00,0xca]
+
+s_load_i16 s5, s[2:3], vcc_lo
+// GFX12: encoding: [0x41,0x41,0x01,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_i16 s5, s[2:3], vcc_hi
+// GFX12: encoding: [0x41,0x41,0x01,0xf4,0x00,0x00,0x00,0xd6]
+
+s_load_i16 s5, s[2:3], m0
+// GFX12: encoding: [0x41,0x41,0x01,0xf4,0x00,0x00,0x00,0xfa]
+
+s_load_i16 s5, s[2:3], 0x0
+// GFX12: encoding: [0x41,0x41,0x01,0xf4,0x00,0x00,0x00,0xf8]
+
+s_load_i16 s5, s[2:3], s7 offset:0x12345
+// GFX12: encoding: [0x41,0x41,0x01,0xf4,0x45,0x23,0x01,0x0e]
+
+s_load_u16 s5, s[2:3], s0
+// GFX12: encoding: [0x41,0x61,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 s101, s[2:3], s0
+// GFX12: encoding: [0x41,0x79,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 vcc_lo, s[2:3], s0
+// GFX12: encoding: [0x81,0x7a,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 vcc_hi, s[2:3], s0
+// GFX12: encoding: [0xc1,0x7a,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, s[4:5], s0
+// GFX12: encoding: [0x42,0x61,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, s[100:101], s0
+// GFX12: encoding: [0x72,0x61,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, vcc, s0
+// GFX12: encoding: [0x75,0x61,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, s[2:3], s101
+// GFX12: encoding: [0x41,0x61,0x01,0xf4,0x00,0x00,0x00,0xca]
+
+s_load_u16 s5, s[2:3], vcc_lo
+// GFX12: encoding: [0x41,0x61,0x01,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_u16 s5, s[2:3], vcc_hi
+// GFX12: encoding: [0x41,0x61,0x01,0xf4,0x00,0x00,0x00,0xd6]
+
+s_load_u16 s5, s[2:3], m0
+// GFX12: encoding: [0x41,0x61,0x01,0xf4,0x00,0x00,0x00,0xfa]
+
+s_load_u16 s5, s[2:3], 0x0
+// GFX12: encoding: [0x41,0x61,0x01,0xf4,0x00,0x00,0x00,0xf8]
+
+s_load_u16 s5, s[2:3], s7 offset:0x12345
+// GFX12: encoding: [0x41,0x61,0x01,0xf4,0x45,0x23,0x01,0x0e]
+
+s_buffer_load_i16 s5, s[4:7], s0
+// GFX12: encoding: [0x42,0x41,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i16 s101, s[4:7], s0
+// GFX12: encoding: [0x42,0x59,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i16 vcc_lo, s[4:7], s0
+// GFX12: encoding: [0x82,0x5a,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i16 vcc_hi, s[4:7], s0
+// GFX12: encoding: [0xc2,0x5a,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i16 s5, s[8:11], s0
+// GFX12: encoding: [0x44,0x41,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i16 s5, s[96:99], s0
+// GFX12: encoding: [0x70,0x41,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i16 s5, s[4:7], s101
+// GFX12: encoding: [0x42,0x41,0x03,0xf4,0x00,0x00,0x00,0xca]
+
+s_buffer_load_i16 s5, s[4:7], vcc_lo
+// GFX12: encoding: [0x42,0x41,0x03,0xf4,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_i16 s5, s[4:7], vcc_hi
+// GFX12: encoding: [0x42,0x41,0x03,0xf4,0x00,0x00,0x00,0xd6]
+
+s_buffer_load_i16 s5, s[4:7], m0
+// GFX12: encoding: [0x42,0x41,0x03,0xf4,0x00,0x00,0x00,0xfa]
+
+s_buffer_load_i16 s5, s[4:7], 0x0
+// GFX12: encoding: [0x42,0x41,0x03,0xf4,0x00,0x00,0x00,0xf8]
+
+s_buffer_load_i16 s5, s[4:7], s0 offset:0x12345
+// GFX12: encoding: [0x42,0x41,0x03,0xf4,0x45,0x23,0x01,0x00]
+
+s_buffer_load_u16 s5, s[4:7], s0
+// GFX12: encoding: [0x42,0x61,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u16 s101, s[4:7], s0
+// GFX12: encoding: [0x42,0x79,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u16 vcc_lo, s[4:7], s0
+// GFX12: encoding: [0x82,0x7a,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u16 vcc_hi, s[4:7], s0
+// GFX12: encoding: [0xc2,0x7a,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u16 s5, s[8:11], s0
+// GFX12: encoding: [0x44,0x61,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u16 s5, s[96:99], s0
+// GFX12: encoding: [0x70,0x61,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u16 s5, s[4:7], s101
+// GFX12: encoding: [0x42,0x61,0x03,0xf4,0x00,0x00,0x00,0xca]
+
+s_buffer_load_u16 s5, s[4:7], vcc_lo
+// GFX12: encoding: [0x42,0x61,0x03,0xf4,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_u16 s5, s[4:7], vcc_hi
+// GFX12: encoding: [0x42,0x61,0x03,0xf4,0x00,0x00,0x00,0xd6]
+
+s_buffer_load_u16 s5, s[4:7], m0
+// GFX12: encoding: [0x42,0x61,0x03,0xf4,0x00,0x00,0x00,0xfa]
+
+s_buffer_load_u16 s5, s[4:7], 0x0
+// GFX12: encoding: [0x42,0x61,0x03,0xf4,0x00,0x00,0x00,0xf8]
+
+s_buffer_load_u16 s5, s[4:7], s0 offset:0x12345
+// GFX12: encoding: [0x42,0x61,0x03,0xf4,0x45,0x23,0x01,0x00]
+
s_load_b32 s5, s[2:3], s0
// GFX12: s_load_b32 s5, s[2:3], s0 offset:0x0 ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0x00]
@@ -646,6 +946,138 @@ s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE
s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS
// GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x02,0x85,0x60,0xf5,0x00,0x00,0x00,0x00]
+s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_RT
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0 ; encoding: [0x41,0x01,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_NT
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x41,0x01,0x81,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_HT
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x41,0x01,0x01,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_LU
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x41,0x01,0x81,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s0 offset:0x0 scope:SCOPE_CU
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0 ; encoding: [0x41,0x01,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s0 offset:0x0 scope:SCOPE_SE
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x41,0x01,0x21,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s0 offset:0x0 scope:SCOPE_DEV
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x41,0x01,0x41,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s0 offset:0x0 scope:SCOPE_SYS
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x41,0x01,0x61,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x41,0x01,0x21,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x41,0x01,0xa1,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x41,0x01,0x61,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_RT
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0 ; encoding: [0x41,0x39,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_NT
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x41,0x39,0x81,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_HT
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x41,0x39,0x01,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_LU
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x41,0x39,0x81,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 scope:SCOPE_CU
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0 ; encoding: [0x41,0x39,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 scope:SCOPE_SE
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x41,0x39,0x21,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 scope:SCOPE_DEV
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x41,0x39,0x41,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 scope:SCOPE_SYS
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x41,0x39,0x61,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x41,0x39,0x21,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x41,0x39,0xa1,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x41,0x39,0x61,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_RT
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 ; encoding: [0x72,0x41,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_NT
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x72,0x41,0x81,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_HT
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x72,0x41,0x01,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_LU
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x72,0x41,0x81,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 scope:SCOPE_CU
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 ; encoding: [0x72,0x41,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 scope:SCOPE_SE
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x72,0x41,0x21,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 scope:SCOPE_DEV
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x72,0x41,0x41,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 scope:SCOPE_SYS
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x72,0x41,0x61,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x72,0x41,0x21,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x72,0x41,0xa1,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x72,0x41,0x61,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, vcc, s0 offset:0x0 th:TH_LOAD_RT
+// GFX12: s_load_u16 s5, vcc, s0 offset:0x0 ; encoding: [0x75,0x61,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, vcc, s0 offset:0x0 th:TH_LOAD_NT
+// GFX12: s_load_u16 s5, vcc, s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x75,0x61,0x81,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, vcc, s0 offset:0x0 th:TH_LOAD_HT
+// GFX12: s_load_u16 s5, vcc, s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x75,0x61,0x01,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, vcc, s0 offset:0x0 th:TH_LOAD_LU
+// GFX12: s_load_u16 s5, vcc, s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x75,0x61,0x81,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, vcc, s0 offset:0x0 scope:SCOPE_CU
+// GFX12: s_load_u16 s5, vcc, s0 offset:0x0 ; encoding: [0x75,0x61,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, vcc, s0 offset:0x0 scope:SCOPE_SE
+// GFX12: s_load_u16 s5, vcc, s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x75,0x61,0x21,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, vcc, s0 offset:0x0 scope:SCOPE_DEV
+// GFX12: s_load_u16 s5, vcc, s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x75,0x61...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/76966
More information about the llvm-commits
mailing list