[llvm] [AMDGPU][MC] Add GFX12 SMEM encoding (PR #75215)

Mirko BrkuĊĦanin via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 14 08:27:48 PST 2023


https://github.com/mbrkusanin updated https://github.com/llvm/llvm-project/pull/75215

>From b9534de3062fb69f2198680c1dcfbd518b776cdb Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Tue, 12 Dec 2023 18:00:30 +0100
Subject: [PATCH] [AMDGPU][MC] Add GFX12 SMEM encoding

---
 llvm/lib/Target/AMDGPU/AMDGPU.td              |  11 +-
 .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp      |   2 +-
 .../Disassembler/AMDGPUDisassembler.cpp       |   3 +
 llvm/lib/Target/AMDGPU/GCNSubtarget.h         |   3 +
 llvm/lib/Target/AMDGPU/SIRegisterInfo.td      |   2 +-
 llvm/lib/Target/AMDGPU/SMInstructions.td      |  77 +-
 llvm/test/MC/AMDGPU/gfx11_asm_err.s           |   6 +
 llvm/test/MC/AMDGPU/gfx12_asm_smem.s          | 810 ++++++++++++++++++
 llvm/test/MC/AMDGPU/gfx12_err.s               |   9 +
 .../Disassembler/AMDGPU/gfx12_dasm_smem.txt   | 735 ++++++++++++++++
 10 files changed, 1644 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 799e102d56174d..920cf784858768 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -411,6 +411,12 @@ def FeatureVGPRIndexMode : SubtargetFeature<"vgpr-index-mode",
   "Has VGPR mode register indexing"
 >;
 
+def FeatureScalarDwordx3Loads : SubtargetFeature<"scalar-dwordx3-loads",
+  "HasScalarDwordx3Loads",
+  "true",
+  "Has 96-bit scalar load instructions"
+>;
+
 def FeatureScalarStores : SubtargetFeature<"scalar-stores",
   "HasScalarStores",
   "true",
@@ -1462,7 +1468,8 @@ def FeatureISAVersion12 : FeatureSet<
    FeatureVcmpxPermlaneHazard,
    FeatureSALUFloatInsts,
    FeatureVGPRSingleUseHintInsts,
-   FeatureMADIntraFwdBug]>;
+   FeatureMADIntraFwdBug,
+   FeatureScalarDwordx3Loads]>;
 
 //===----------------------------------------------------------------------===//
 
@@ -2011,6 +2018,8 @@ def HasNoCvtFP8VOP1Bug : Predicate<"!Subtarget->hasCvtFP8VOP1Bug()">;
 
 def HasAtomicCSubNoRtnInsts : Predicate<"Subtarget->hasAtomicCSubNoRtnInsts()">;
 
+def HasScalarDwordx3Loads : Predicate<"Subtarget->hasScalarDwordx3Loads()">;
+
 // Include AMDGPU TD files
 include "SISchedule.td"
 include "GCNProcessors.td"
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 2f1e1809b6cd73..3b69a37728ea1c 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -2635,7 +2635,7 @@ AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
     // SGPR and TTMP registers must be aligned.
     // Max required alignment is 4 dwords.
-    AlignSize = std::min(RegWidth / 32, 4u);
+    AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
   }
 
   if (RegNum % AlignSize != 0) {
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 1cfc5af571c1f9..392bc626167cf6 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -213,6 +213,7 @@ DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32)
 DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32)
 DECODE_OPERAND_REG_7(SReg_64, OPW64)
 DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64)
+DECODE_OPERAND_REG_7(SReg_96, OPW96)
 DECODE_OPERAND_REG_7(SReg_128, OPW128)
 DECODE_OPERAND_REG_7(SReg_256, OPW256)
 DECODE_OPERAND_REG_7(SReg_512, OPW512)
@@ -1239,6 +1240,8 @@ MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
   case AMDGPU::TTMP_64RegClassID:
     shift = 1;
     break;
+  case AMDGPU::SGPR_96RegClassID:
+  case AMDGPU::TTMP_96RegClassID:
   case AMDGPU::SGPR_128RegClassID:
   case AMDGPU::TTMP_128RegClassID:
   // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 8e1350be8b45fc..e59b74835b42aa 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -119,6 +119,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   bool HasFmaMixInsts = false;
   bool HasMovrel = false;
   bool HasVGPRIndexMode = false;
+  bool HasScalarDwordx3Loads = false;
   bool HasScalarStores = false;
   bool HasScalarAtomics = false;
   bool HasSDWAOmod = false;
@@ -886,6 +887,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
     return getGeneration() >= VOLCANIC_ISLANDS;
   }
 
+  bool hasScalarDwordx3Loads() const { return HasScalarDwordx3Loads; }
+
   bool hasScalarStores() const {
     return HasScalarStores;
   }
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 7ea2280c474b05..981da13fe08952 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -414,7 +414,7 @@ def SGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
 // SGPR 64-bit registers
 def SGPR_64Regs : SIRegisterTuples<getSubRegs<2>.ret, SGPR_32, 105, 2, 2, "s">;
 
-// SGPR 96-bit registers. No operations use these, but for symmetry with 96-bit VGPRs.
+// SGPR 96-bit registers.
 def SGPR_96Regs : SIRegisterTuples<getSubRegs<3>.ret, SGPR_32, 105, 4, 3, "s">;
 
 // SGPR 128-bit registers
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index c18846483cf95a..d24bfd535d4ddc 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -74,7 +74,7 @@ class SM_Real <SM_Pseudo ps, string opName = ps.Mnemonic>
   bits<7>  sdst;
   bits<32> offset;
   bits<8>  soffset;
-  bits<5> cpol;
+  bits<5>  cpol;
 }
 
 class OffsetMode<bit hasOffset, bit hasSOffset, string variant,
@@ -300,6 +300,8 @@ multiclass SM_Pseudo_Atomics<RegisterClass baseClass,
 // does sdst for SMRD on SI/CI?
 defm S_LOAD_DWORD    : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
 defm S_LOAD_DWORDX2  : SM_Pseudo_Loads <SReg_64, SReg_64_XEXEC>;
+let SubtargetPredicate = HasScalarDwordx3Loads in
+  defm S_LOAD_DWORDX3  : SM_Pseudo_Loads <SReg_64, SReg_96>;
 defm S_LOAD_DWORDX4  : SM_Pseudo_Loads <SReg_64, SReg_128>;
 defm S_LOAD_DWORDX8  : SM_Pseudo_Loads <SReg_64, SReg_256>;
 defm S_LOAD_DWORDX16 : SM_Pseudo_Loads <SReg_64, SReg_512>;
@@ -309,6 +311,8 @@ defm S_BUFFER_LOAD_DWORD : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
 // FIXME: exec_lo/exec_hi appear to be allowed for SMRD loads on
 // SI/CI, bit disallowed for SMEM on VI.
 defm S_BUFFER_LOAD_DWORDX2 : SM_Pseudo_Loads <SReg_128, SReg_64_XEXEC>;
+let SubtargetPredicate = HasScalarDwordx3Loads in
+  defm S_BUFFER_LOAD_DWORDX3 : SM_Pseudo_Loads <SReg_128, SReg_96>;
 defm S_BUFFER_LOAD_DWORDX4 : SM_Pseudo_Loads <SReg_128, SReg_128>;
 defm S_BUFFER_LOAD_DWORDX8 : SM_Pseudo_Loads <SReg_128, SReg_256>;
 defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads <SReg_128, SReg_512>;
@@ -1179,7 +1183,7 @@ def SMInfoTable : GenericTable {
 class SMEM_Real_gfx11<bits<8> op, SM_Pseudo ps, string opName = ps.Mnemonic> :
     SMEM_Real_10Plus_common<op, ps, opName, SIEncodingFamily.GFX11,
                             SGPR_NULL_gfx11plus> {
-  let AssemblerPredicate = isGFX11Plus;
+  let AssemblerPredicate = isGFX11Only;
   let DecoderNamespace = "GFX11";
   let Inst{13}    = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0);
   let Inst{14}    = !if(ps.has_glc, cpol{CPolBit.GLC}, 0);
@@ -1235,19 +1239,30 @@ defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx11 <0x23>;
 // GFX12.
 //===----------------------------------------------------------------------===//
 
-class SMEM_Real_gfx12<bits<8> op, SM_Pseudo ps, string opName = ps.Mnemonic> :
-    SMEM_Real_10Plus_common<op, ps, opName, SIEncodingFamily.GFX12,
-                            SGPR_NULL_gfx11plus> {
+class SMEM_Real_gfx12Plus<bits<6> op, SM_Pseudo ps, string opName,
+                          int subtarget, RegisterWithSubRegs sgpr_null> :
+    SM_Real<ps, opName>, SIMCInstr<ps.PseudoInstr, subtarget>, Enc64 {
+
+  let Inst{18-13} = op;
+  let Inst{31-26} = 0x3d;
+
+  let Inst{55-32} = !if(ps.has_offset, offset{23-0}, !if(ps.has_soffset, 0, ?));
+  let Inst{63-57} = !if(ps.has_soffset, soffset{6-0},
+                        !if(ps.has_offset, sgpr_null.HWEncoding{6-0}, ?));
+}
+
+class SMEM_Real_gfx12<bits<6> op, SM_Pseudo ps, string opName = ps.Mnemonic> :
+    SMEM_Real_gfx12Plus<op, ps, opName, SIEncodingFamily.GFX12,
+                        SGPR_NULL_gfx11plus> {
   let AssemblerPredicate = isGFX12Plus;
   let DecoderNamespace = "GFX12";
-  let Inst{18-13} = op{5-0};
-  let Inst{19}    = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0);
-  let Inst{24-20} = ?; // TODO-GFX12: Add new bits {24-20}: TH, Scope, NV
-  let Inst{25}    = !if(ps.has_glc, cpol{CPolBit.GLC}, 0);
-  let Inst{55-32} = offset{23-0};
+
+  let Inst{5-0}   = !if(ps.has_sbase, sbase{6-1}, ?);
+  let Inst{12-6}  = !if(ps.has_sdst, sdst{6-0}, ?);
 }
 
-class SMEM_Real_Prefetch_gfx12 <bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx12<op, ps> {
+class SMEM_Real_Prefetch_gfx12<bits<6> op, SM_Pseudo ps> :
+    SMEM_Real_gfx12<op, ps> {
   bits<7> sdata; // Only 5 bits of sdata are supported.
 
   let sdst = ?;
@@ -1255,8 +1270,48 @@ class SMEM_Real_Prefetch_gfx12 <bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx12<op,
   let Inst{10-6}  = !if(ps.has_sdst, sdata{4-0}, ?);
 }
 
+class SMEM_Real_Load_gfx12<bits<6> op, string ps, string opName, OffsetMode offsets> :
+    SMEM_Real_gfx12<op, !cast<SM_Pseudo>(ps # offsets.Variant), opName> {
+  RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps # offsets.Variant).BaseClass;
+  let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol));
+
+  let Inst{22-21} = cpol{4-3}; // scope
+  let Inst{24-23} = cpol{1-0}; // th - only lower 2 bits are supported
+}
+
+multiclass SM_Real_Loads_gfx12<bits<6> op, string ps = NAME> {
+  defvar opName = !tolower(NAME);
+  def _IMM_gfx12 : SMEM_Real_Load_gfx12<op, ps, opName, IMM_Offset>;
+  def _SGPR_IMM_gfx12 : SMEM_Real_Load_gfx12<op, ps, opName, SGPR_IMM_Offset>;
+}
+
+defm S_LOAD_B32  : SM_Real_Loads_gfx12<0x00, "S_LOAD_DWORD">;
+defm S_LOAD_B64  : SM_Real_Loads_gfx12<0x01, "S_LOAD_DWORDX2">;
+defm S_LOAD_B96  : SM_Real_Loads_gfx12<0x05, "S_LOAD_DWORDX3">;
+defm S_LOAD_B128 : SM_Real_Loads_gfx12<0x02, "S_LOAD_DWORDX4">;
+defm S_LOAD_B256 : SM_Real_Loads_gfx12<0x03, "S_LOAD_DWORDX8">;
+defm S_LOAD_B512 : SM_Real_Loads_gfx12<0x04, "S_LOAD_DWORDX16">;
+
+defm S_BUFFER_LOAD_B32  : SM_Real_Loads_gfx12<0x10, "S_BUFFER_LOAD_DWORD">;
+defm S_BUFFER_LOAD_B64  : SM_Real_Loads_gfx12<0x11, "S_BUFFER_LOAD_DWORDX2">;
+defm S_BUFFER_LOAD_B96  : SM_Real_Loads_gfx12<0x15, "S_BUFFER_LOAD_DWORDX3">;
+defm S_BUFFER_LOAD_B128 : SM_Real_Loads_gfx12<0x12, "S_BUFFER_LOAD_DWORDX4">;
+defm S_BUFFER_LOAD_B256 : SM_Real_Loads_gfx12<0x13, "S_BUFFER_LOAD_DWORDX8">;
+defm S_BUFFER_LOAD_B512 : SM_Real_Loads_gfx12<0x14, "S_BUFFER_LOAD_DWORDX16">;
+
+def S_DCACHE_INV_gfx12 : SMEM_Real_gfx12<0x021, S_DCACHE_INV>;
+
 def S_PREFETCH_INST_gfx12        : SMEM_Real_Prefetch_gfx12<0x24, S_PREFETCH_INST>;
 def S_PREFETCH_INST_PC_REL_gfx12 : SMEM_Real_Prefetch_gfx12<0x25, S_PREFETCH_INST_PC_REL>;
 def S_PREFETCH_DATA_gfx12        : SMEM_Real_Prefetch_gfx12<0x26, S_PREFETCH_DATA>;
 def S_BUFFER_PREFETCH_DATA_gfx12 : SMEM_Real_Prefetch_gfx12<0x27, S_BUFFER_PREFETCH_DATA>;
 def S_PREFETCH_DATA_PC_REL_gfx12 : SMEM_Real_Prefetch_gfx12<0x28, S_PREFETCH_DATA_PC_REL>;
+
+multiclass SMEM_Real_Probe_gfx12<bits<6> op> {
+  defvar ps = NAME;
+  def _IMM_gfx12      : SMEM_Real_Prefetch_gfx12<op, !cast<SM_Probe_Pseudo>(ps#_IMM)>;
+  def _SGPR_IMM_gfx12 : SMEM_Real_Prefetch_gfx12<op, !cast<SM_Probe_Pseudo>(ps#_SGPR_IMM)>;
+}
+
+defm S_ATC_PROBE        : SMEM_Real_Probe_gfx12<0x22>;
+defm S_ATC_PROBE_BUFFER : SMEM_Real_Probe_gfx12<0x23>;
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_err.s
index 9726dea77f98ef..088ee416692b88 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_err.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_err.s
@@ -158,3 +158,9 @@ scratch_store_b128 off, v[2:5], s0 offset:8000000
 
 flat_atomic_add_f32 v1, v[0:1], v2 offset:-1
 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: expected a 12-bit unsigned offset
+
+s_load_b96 s[20:22], s[2:3], s0
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_buffer_load_b96 s[20:22], s[4:7], s0
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_smem.s b/llvm/test/MC/AMDGPU/gfx12_asm_smem.s
index ed7ad5bb0c4e82..1566b9c04e3494 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_smem.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_smem.s
@@ -33,3 +33,813 @@ s_buffer_prefetch_data s[20:23], 100, s10, 7
 
 s_buffer_prefetch_data s[20:23], 100, null, 7
 // GFX12: s_buffer_prefetch_data s[20:23], 0x64, null, 7 ; encoding: [0xca,0xe1,0x04,0xf4,0x64,0x00,0x00,0xf8]
+
+s_load_b32 s5, s[2:3], s0
+// GFX12: s_load_b32 s5, s[2:3], s0 offset:0x0    ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b32 s101, s[2:3], s0
+// GFX12: s_load_b32 s101, s[2:3], s0 offset:0x0  ; encoding: [0x41,0x19,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b32 vcc_lo, s[2:3], s0
+// GFX12: s_load_b32 vcc_lo, s[2:3], s0 offset:0x0 ; encoding: [0x81,0x1a,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b32 vcc_hi, s[2:3], s0
+// GFX12: s_load_b32 vcc_hi, s[2:3], s0 offset:0x0 ; encoding: [0xc1,0x1a,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b32 s5, s[4:5], s0
+// GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0    ; encoding: [0x42,0x01,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b32 s5, s[100:101], s0
+// GFX12: s_load_b32 s5, s[100:101], s0 offset:0x0 ; encoding: [0x72,0x01,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b32 s5, vcc, s0
+// GFX12: s_load_b32 s5, vcc, s0 offset:0x0       ; encoding: [0x75,0x01,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b32 s5, s[2:3], s101
+// GFX12: s_load_b32 s5, s[2:3], s101 offset:0x0  ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xca]
+
+s_load_b32 s5, s[2:3], vcc_lo
+// GFX12: s_load_b32 s5, s[2:3], vcc_lo offset:0x0 ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_b32 s5, s[2:3], vcc_hi
+// GFX12: s_load_b32 s5, s[2:3], vcc_hi offset:0x0 ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xd6]
+
+s_load_b32 s5, s[2:3], m0
+// GFX12: s_load_b32 s5, s[2:3], m0 offset:0x0    ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xfa]
+
+s_load_b32 s5, s[2:3], 0x0
+// GFX12: s_load_b32 s5, s[2:3], 0x0              ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xf8]
+
+s_load_b32 s5, s[2:3], s7 offset:0x12345
+// GFX12: s_load_b32 s5, s[2:3], s7 offset:0x12345 ; encoding: [0x41,0x01,0x00,0xf4,0x45,0x23,0x01,0x0e]
+
+s_load_b64 s[10:11], s[2:3], s0
+// GFX12: s_load_b64 s[10:11], s[2:3], s0 offset:0x0 ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b64 s[12:13], s[2:3], s0
+// GFX12: s_load_b64 s[12:13], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x23,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b64 s[100:101], s[2:3], s0
+// GFX12: s_load_b64 s[100:101], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x39,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b64 vcc, s[2:3], s0
+// GFX12: s_load_b64 vcc, s[2:3], s0 offset:0x0   ; encoding: [0x81,0x3a,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b64 s[10:11], s[4:5], s0
+// GFX12: s_load_b64 s[10:11], s[4:5], s0 offset:0x0 ; encoding: [0x82,0x22,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b64 s[10:11], s[100:101], s0
+// GFX12: s_load_b64 s[10:11], s[100:101], s0 offset:0x0 ; encoding: [0xb2,0x22,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b64 s[10:11], vcc, s0
+// GFX12: s_load_b64 s[10:11], vcc, s0 offset:0x0 ; encoding: [0xb5,0x22,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b64 s[10:11], s[2:3], s101
+// GFX12: s_load_b64 s[10:11], s[2:3], s101 offset:0x0 ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xca]
+
+s_load_b64 s[10:11], s[2:3], vcc_lo
+// GFX12: s_load_b64 s[10:11], s[2:3], vcc_lo offset:0x0 ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_b64 s[10:11], s[2:3], vcc_hi
+// GFX12: s_load_b64 s[10:11], s[2:3], vcc_hi offset:0x0 ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xd6]
+
+s_load_b64 s[10:11], s[2:3], m0
+// GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xfa]
+
+s_load_b64 s[10:11], s[2:3], 0x0
+// GFX12: s_load_b64 s[10:11], s[2:3], 0x0        ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xf8]
+
+s_load_b96 s[20:22], s[2:3], s0
+// GFX12: s_load_b96 s[20:22], s[2:3], s0 offset:0x0 ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b96 s[24:26], s[2:3], s0
+// GFX12: s_load_b96 s[24:26], s[2:3], s0 offset:0x0 ; encoding: [0x01,0xa6,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b96 s[96:98], s[2:3], s0
+// GFX12: s_load_b96 s[96:98], s[2:3], s0 offset:0x0 ; encoding: [0x01,0xb8,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b96 s[20:22], s[4:5], s0
+// GFX12: s_load_b96 s[20:22], s[4:5], s0 offset:0x0 ; encoding: [0x02,0xa5,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b96 s[20:22], s[100:101], s0
+// GFX12: s_load_b96 s[20:22], s[100:101], s0 offset:0x0 ; encoding: [0x32,0xa5,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b96 s[20:22], vcc, s0
+// GFX12: s_load_b96 s[20:22], vcc, s0 offset:0x0 ; encoding: [0x35,0xa5,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b96 s[20:22], s[2:3], s101
+// GFX12: s_load_b96 s[20:22], s[2:3], s101 offset:0x0 ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xca]
+
+s_load_b96 s[20:22], s[2:3], vcc_lo
+// GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_b96 s[20:22], s[2:3], vcc_hi
+// GFX12: s_load_b96 s[20:22], s[2:3], vcc_hi offset:0x0 ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xd6]
+
+s_load_b96 s[20:22], s[2:3], m0
+// GFX12: s_load_b96 s[20:22], s[2:3], m0 offset:0x0 ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xfa]
+
+s_load_b96 s[20:22], s[2:3], 0x0
+// GFX12: s_load_b96 s[20:22], s[2:3], 0x0       ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xf8]
+
+s_load_b128 s[20:23], s[2:3], s0
+// GFX12: s_load_b128 s[20:23], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b128 s[24:27], s[2:3], s0
+// GFX12: s_load_b128 s[24:27], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x46,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b128 s[96:99], s[2:3], s0
+// GFX12: s_load_b128 s[96:99], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x58,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b128 s[20:23], s[4:5], s0
+// GFX12: s_load_b128 s[20:23], s[4:5], s0 offset:0x0 ; encoding: [0x02,0x45,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b128 s[20:23], s[100:101], s0
+// GFX12: s_load_b128 s[20:23], s[100:101], s0 offset:0x0 ; encoding: [0x32,0x45,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b128 s[20:23], vcc, s0
+// GFX12: s_load_b128 s[20:23], vcc, s0 offset:0x0 ; encoding: [0x35,0x45,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b128 s[20:23], s[2:3], s101
+// GFX12: s_load_b128 s[20:23], s[2:3], s101 offset:0x0 ; encoding: [0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xca]
+
+s_load_b128 s[20:23], s[2:3], vcc_lo
+// GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 ; encoding: [0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_b128 s[20:23], s[2:3], vcc_hi
+// GFX12: s_load_b128 s[20:23], s[2:3], vcc_hi offset:0x0 ; encoding: [0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xd6]
+
+s_load_b128 s[20:23], s[2:3], m0
+// GFX12: s_load_b128 s[20:23], s[2:3], m0 offset:0x0 ; encoding: [0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xfa]
+
+s_load_b128 s[20:23], s[2:3], 0x0
+// GFX12: s_load_b128 s[20:23], s[2:3], 0x0       ; encoding: [0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xf8]
+
+s_load_b256 s[20:27], s[2:3], s0
+// GFX12: s_load_b256 s[20:27], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b256 s[24:31], s[2:3], s0
+// GFX12: s_load_b256 s[24:31], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x66,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b256 s[92:99], s[2:3], s0
+// GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x77,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b256 s[20:27], s[4:5], s0
+// GFX12: s_load_b256 s[20:27], s[4:5], s0 offset:0x0 ; encoding: [0x02,0x65,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b256 s[20:27], s[100:101], s0
+// GFX12: s_load_b256 s[20:27], s[100:101], s0 offset:0x0 ; encoding: [0x32,0x65,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b256 s[20:27], vcc, s0
+// GFX12: s_load_b256 s[20:27], vcc, s0 offset:0x0 ; encoding: [0x35,0x65,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b256 s[20:27], s[2:3], s101
+// GFX12: s_load_b256 s[20:27], s[2:3], s101 offset:0x0 ; encoding: [0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0xca]
+
+s_load_b256 s[20:27], s[2:3], vcc_lo
+// GFX12: s_load_b256 s[20:27], s[2:3], vcc_lo offset:0x0 ; encoding: [0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_b256 s[20:27], s[2:3], vcc_hi
+// GFX12: s_load_b256 s[20:27], s[2:3], vcc_hi offset:0x0 ; encoding: [0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0xd6]
+
+s_load_b256 s[20:27], s[2:3], m0
+// GFX12: s_load_b256 s[20:27], s[2:3], m0 offset:0x0 ; encoding: [0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0xfa]
+
+s_load_b256 s[20:27], s[2:3], 0x0
+// GFX12: s_load_b256 s[20:27], s[2:3], 0x0       ; encoding: [0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0xf8]
+
+s_load_b512 s[20:35], s[2:3], s0
+// GFX12: s_load_b512 s[20:35], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b512 s[24:39], s[2:3], s0
+// GFX12: s_load_b512 s[24:39], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x86,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b512 s[84:99], s[2:3], s0
+// GFX12: s_load_b512 s[84:99], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x95,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b512 s[20:35], s[4:5], s0
+// GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 ; encoding: [0x02,0x85,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b512 s[20:35], s[100:101], s0
+// GFX12: s_load_b512 s[20:35], s[100:101], s0 offset:0x0 ; encoding: [0x32,0x85,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b512 s[20:35], vcc, s0
+// GFX12: s_load_b512 s[20:35], vcc, s0 offset:0x0 ; encoding: [0x35,0x85,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b512 s[20:35], s[2:3], s101
+// GFX12: s_load_b512 s[20:35], s[2:3], s101 offset:0x0 ; encoding: [0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0xca]
+
+s_load_b512 s[20:35], s[2:3], vcc_lo
+// GFX12: s_load_b512 s[20:35], s[2:3], vcc_lo offset:0x0 ; encoding: [0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_b512 s[20:35], s[2:3], vcc_hi
+// GFX12: s_load_b512 s[20:35], s[2:3], vcc_hi offset:0x0 ; encoding: [0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0xd6]
+
+s_load_b512 s[20:35], s[2:3], m0
+// GFX12: s_load_b512 s[20:35], s[2:3], m0 offset:0x0 ; encoding: [0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0xfa]
+
+s_load_b512 s[20:35], s[2:3], 0x0
+// GFX12: s_load_b512 s[20:35], s[2:3], 0x0       ; encoding: [0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0xf8]
+
+s_buffer_load_b32 s5, s[4:7], s0
+// GFX12: s_buffer_load_b32 s5, s[4:7], s0 offset:0x0 ; encoding: [0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b32 s101, s[4:7], s0
+// GFX12: s_buffer_load_b32 s101, s[4:7], s0 offset:0x0 ; encoding: [0x42,0x19,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b32 vcc_lo, s[4:7], s0
+// GFX12: s_buffer_load_b32 vcc_lo, s[4:7], s0 offset:0x0 ; encoding: [0x82,0x1a,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b32 vcc_hi, s[4:7], s0
+// GFX12: s_buffer_load_b32 vcc_hi, s[4:7], s0 offset:0x0 ; encoding: [0xc2,0x1a,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b32 s5, s[8:11], s0
+// GFX12: s_buffer_load_b32 s5, s[8:11], s0 offset:0x0 ; encoding: [0x44,0x01,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b32 s5, s[96:99], s0
+// GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 ; encoding: [0x70,0x01,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b32 s5, s[4:7], s101
+// GFX12: s_buffer_load_b32 s5, s[4:7], s101 offset:0x0 ; encoding: [0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0xca]
+
+s_buffer_load_b32 s5, s[4:7], vcc_lo
+// GFX12: s_buffer_load_b32 s5, s[4:7], vcc_lo offset:0x0 ; encoding: [0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_b32 s5, s[4:7], vcc_hi
+// GFX12: s_buffer_load_b32 s5, s[4:7], vcc_hi offset:0x0 ; encoding: [0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0xd6]
+
+s_buffer_load_b32 s5, s[4:7], m0
+// GFX12: s_buffer_load_b32 s5, s[4:7], m0 offset:0x0 ; encoding: [0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0xfa]
+
+s_buffer_load_b32 s5, s[4:7], 0x0
+// GFX12: s_buffer_load_b32 s5, s[4:7], 0x0       ; encoding: [0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0xf8]
+
+s_buffer_load_b64 s[10:11], s[4:7], s0
+// GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 ; encoding: [0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b64 s[12:13], s[4:7], s0
+// GFX12: s_buffer_load_b64 s[12:13], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x23,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b64 s[100:101], s[4:7], s0
+// GFX12: s_buffer_load_b64 s[100:101], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x39,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b64 vcc, s[4:7], s0
+// GFX12: s_buffer_load_b64 vcc, s[4:7], s0 offset:0x0 ; encoding: [0x82,0x3a,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b64 s[10:11], s[8:11], s0
+// GFX12: s_buffer_load_b64 s[10:11], s[8:11], s0 offset:0x0 ; encoding: [0x84,0x22,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b64 s[10:11], s[96:99], s0
+// GFX12: s_buffer_load_b64 s[10:11], s[96:99], s0 offset:0x0 ; encoding: [0xb0,0x22,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b64 s[10:11], s[4:7], s101
+// GFX12: s_buffer_load_b64 s[10:11], s[4:7], s101 offset:0x0 ; encoding: [0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0xca]
+
+s_buffer_load_b64 s[10:11], s[4:7], vcc_lo
+// GFX12: s_buffer_load_b64 s[10:11], s[4:7], vcc_lo offset:0x0 ; encoding: [0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_b64 s[10:11], s[4:7], vcc_hi
+// GFX12: s_buffer_load_b64 s[10:11], s[4:7], vcc_hi offset:0x0 ; encoding: [0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0xd6]
+
+s_buffer_load_b64 s[10:11], s[4:7], m0
+// GFX12: s_buffer_load_b64 s[10:11], s[4:7], m0 offset:0x0 ; encoding: [0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0xfa]
+
+s_buffer_load_b64 s[10:11], s[4:7], 0x0
+// GFX12: s_buffer_load_b64 s[10:11], s[4:7], 0x0 ; encoding: [0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0xf8]
+
+s_buffer_load_b96 s[20:22], s[4:7], s0
+// GFX12: s_buffer_load_b96 s[20:22], s[4:7], s0 offset:0x0 ; encoding: [0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b96 s[24:26], s[4:7], s0
+// GFX12: s_buffer_load_b96 s[24:26], s[4:7], s0 offset:0x0 ; encoding: [0x02,0xa6,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b96 s[96:98], s[4:7], s0
+// GFX12: s_buffer_load_b96 s[96:98], s[4:7], s0 offset:0x0 ; encoding: [0x02,0xb8,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b96 s[20:22], s[8:11], s0
+// GFX12: s_buffer_load_b96 s[20:22], s[8:11], s0 offset:0x0 ; encoding: [0x04,0xa5,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b96 s[20:22], s[96:99], s0
+// GFX12: s_buffer_load_b96 s[20:22], s[96:99], s0 offset:0x0 ; encoding: [0x30,0xa5,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b96 s[20:22], s[4:7], s101
+// GFX12: s_buffer_load_b96 s[20:22], s[4:7], s101 offset:0x0 ; encoding: [0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xca]
+
+s_buffer_load_b96 s[20:22], s[4:7], vcc_lo
+// GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 ; encoding: [0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_b96 s[20:22], s[4:7], vcc_hi
+// GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_hi offset:0x0 ; encoding: [0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xd6]
+
+s_buffer_load_b96 s[20:22], s[4:7], m0
+// GFX12: s_buffer_load_b96 s[20:22], s[4:7], m0 offset:0x0 ; encoding: [0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xfa]
+
+s_buffer_load_b96 s[20:22], s[4:7], 0x0
+// GFX12: s_buffer_load_b96 s[20:22], s[4:7], 0x0 ; encoding: [0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xf8]
+
+s_buffer_load_b128 s[20:23], s[4:7], s0
+// GFX12: s_buffer_load_b128 s[20:23], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b128 s[24:27], s[4:7], s0
+// GFX12: s_buffer_load_b128 s[24:27], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x46,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b128 s[96:99], s[4:7], s0
+// GFX12: s_buffer_load_b128 s[96:99], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x58,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b128 s[20:23], s[8:11], s0
+// GFX12: s_buffer_load_b128 s[20:23], s[8:11], s0 offset:0x0 ; encoding: [0x04,0x45,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b128 s[20:23], s[96:99], s0
+// GFX12: s_buffer_load_b128 s[20:23], s[96:99], s0 offset:0x0 ; encoding: [0x30,0x45,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b128 s[20:23], s[4:7], s101
+// GFX12: s_buffer_load_b128 s[20:23], s[4:7], s101 offset:0x0 ; encoding: [0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xca]
+
+s_buffer_load_b128 s[20:23], s[4:7], vcc_lo
+// GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 ; encoding: [0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_b128 s[20:23], s[4:7], vcc_hi
+// GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_hi offset:0x0 ; encoding: [0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xd6]
+
+s_buffer_load_b128 s[20:23], s[4:7], m0
+// GFX12: s_buffer_load_b128 s[20:23], s[4:7], m0 offset:0x0 ; encoding: [0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xfa]
+
+s_buffer_load_b128 s[20:23], s[4:7], 0x0
+// GFX12: s_buffer_load_b128 s[20:23], s[4:7], 0x0 ; encoding: [0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xf8]
+
+s_buffer_load_b256 s[20:27], s[4:7], s0
+// GFX12: s_buffer_load_b256 s[20:27], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b256 s[24:31], s[4:7], s0
+// GFX12: s_buffer_load_b256 s[24:31], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x66,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b256 s[92:99], s[4:7], s0
+// GFX12: s_buffer_load_b256 s[92:99], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x77,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b256 s[20:27], s[8:11], s0
+// GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 ; encoding: [0x04,0x65,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b256 s[20:27], s[96:99], s0
+// GFX12: s_buffer_load_b256 s[20:27], s[96:99], s0 offset:0x0 ; encoding: [0x30,0x65,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b256 s[20:27], s[4:7], s101
+// GFX12: s_buffer_load_b256 s[20:27], s[4:7], s101 offset:0x0 ; encoding: [0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0xca]
+
+s_buffer_load_b256 s[20:27], s[4:7], vcc_lo
+// GFX12: s_buffer_load_b256 s[20:27], s[4:7], vcc_lo offset:0x0 ; encoding: [0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_b256 s[20:27], s[4:7], vcc_hi
+// GFX12: s_buffer_load_b256 s[20:27], s[4:7], vcc_hi offset:0x0 ; encoding: [0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0xd6]
+
+s_buffer_load_b256 s[20:27], s[4:7], m0
+// GFX12: s_buffer_load_b256 s[20:27], s[4:7], m0 offset:0x0 ; encoding: [0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0xfa]
+
+s_buffer_load_b256 s[20:27], s[4:7], 0x0
+// GFX12: s_buffer_load_b256 s[20:27], s[4:7], 0x0 ; encoding: [0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0xf8]
+
+s_buffer_load_b512 s[20:35], s[4:7], s0
+// GFX12: s_buffer_load_b512 s[20:35], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b512 s[24:39], s[4:7], s0
+// GFX12: s_buffer_load_b512 s[24:39], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x86,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b512 s[84:99], s[4:7], s0
+// GFX12: s_buffer_load_b512 s[84:99], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x95,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b512 s[20:35], s[8:11], s0
+// GFX12: s_buffer_load_b512 s[20:35], s[8:11], s0 offset:0x0 ; encoding: [0x04,0x85,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b512 s[20:35], s[96:99], s0
+// GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 ; encoding: [0x30,0x85,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b512 s[20:35], s[4:7], s101
+// GFX12: s_buffer_load_b512 s[20:35], s[4:7], s101 offset:0x0 ; encoding: [0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0xca]
+
+s_buffer_load_b512 s[20:35], s[4:7], vcc_lo
+// GFX12: s_buffer_load_b512 s[20:35], s[4:7], vcc_lo offset:0x0 ; encoding: [0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_b512 s[20:35], s[4:7], vcc_hi
+// GFX12: s_buffer_load_b512 s[20:35], s[4:7], vcc_hi offset:0x0 ; encoding: [0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0xd6]
+
+s_buffer_load_b512 s[20:35], s[4:7], m0
+// GFX12: s_buffer_load_b512 s[20:35], s[4:7], m0 offset:0x0 ; encoding: [0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0xfa]
+
+s_buffer_load_b512 s[20:35], s[4:7], 0x0
+// GFX12: s_buffer_load_b512 s[20:35], s[4:7], 0x0 ; encoding: [0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0xf8]
+
+s_dcache_inv
+// GFX12: s_dcache_inv                            ; encoding: [0x00,0x20,0x04,0xf4,0x00,0x00,0x00,0x00]
+
+s_atc_probe 7, s[4:5], s2
+// GFX12: encoding: [0xc2,0x41,0x04,0xf4,0x00,0x00,0x00,0x04]
+
+s_atc_probe 7, s[4:5], 0x64
+// GFX12: encoding: [0xc2,0x41,0x04,0xf4,0x64,0x00,0x00,0xf8]
+
+s_atc_probe 7, s[4:5], s9 offset:0x64
+// GFX12: encoding: [0xc2,0x41,0x04,0xf4,0x64,0x00,0x00,0x12]
+
+s_atc_probe_buffer 7, s[8:11], s2
+// GFX12: encoding: [0xc4,0x61,0x04,0xf4,0x00,0x00,0x00,0x04]
+
+s_atc_probe_buffer 7, s[8:11], 0x64
+// GFX12: encoding: [0xc4,0x61,0x04,0xf4,0x64,0x00,0x00,0xf8]
+
+s_atc_probe_buffer 7, s[8:11], s9 offset:0x64
+// GFX12: encoding: [0xc4,0x61,0x04,0xf4,0x64,0x00,0x00,0x12]
+
+s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_RT
+// GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 ; encoding: [0x42,0x01,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_NT
+// GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x42,0x01,0x80,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_HT
+// GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x42,0x01,0x00,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_LU
+// GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x42,0x01,0x80,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_b32 s5, s[4:5], s0 offset:0x0 scope:SCOPE_CU
+// GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0    ; encoding: [0x42,0x01,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b32 s5, s[4:5], s0 offset:0x0 scope:SCOPE_SE
+// GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x42,0x01,0x20,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b32 s5, s[4:5], s0 offset:0x0 scope:SCOPE_DEV
+// GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x42,0x01,0x40,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b32 s5, s[4:5], s0 offset:0x0 scope:SCOPE_SYS
+// GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x42,0x01,0x60,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE
+// GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x42,0x01,0x20,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE
+// GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x42,0x01,0xa0,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS
+// GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x42,0x01,0x60,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_RT
+// GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xfa]
+
+s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_NT
+// GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x81,0x22,0x80,0xf4,0x00,0x00,0x00,0xfa]
+
+s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_HT
+// GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x81,0x22,0x00,0xf5,0x00,0x00,0x00,0xfa]
+
+s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_LU
+// GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x81,0x22,0x80,0xf5,0x00,0x00,0x00,0xfa]
+
+s_load_b64 s[10:11], s[2:3], m0 offset:0x0 scope:SCOPE_CU
+// GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xfa]
+
+s_load_b64 s[10:11], s[2:3], m0 offset:0x0 scope:SCOPE_SE
+// GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 scope:SCOPE_SE ; encoding: [0x81,0x22,0x20,0xf4,0x00,0x00,0x00,0xfa]
+
+s_load_b64 s[10:11], s[2:3], m0 offset:0x0 scope:SCOPE_DEV
+// GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x81,0x22,0x40,0xf4,0x00,0x00,0x00,0xfa]
+
+s_load_b64 s[10:11], s[2:3], m0 offset:0x0 scope:SCOPE_SYS
+// GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x81,0x22,0x60,0xf4,0x00,0x00,0x00,0xfa]
+
+s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE
+// GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x81,0x22,0x20,0xf5,0x00,0x00,0x00,0xfa]
+
+s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE
+// GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x81,0x22,0xa0,0xf5,0x00,0x00,0x00,0xfa]
+
+s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS
+// GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x81,0x22,0x60,0xf5,0x00,0x00,0x00,0xfa]
+
+s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_RT
+// GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_NT
+// GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_NT ; encoding: [0x01,0xa5,0x80,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT
+// GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT ; encoding: [0x01,0xa5,0x00,0xf5,0x00,0x00,0x00,0xd4]
+
+s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_LU
+// GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_LU ; encoding: [0x01,0xa5,0x80,0xf5,0x00,0x00,0x00,0xd4]
+
+s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 scope:SCOPE_CU
+// GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 scope:SCOPE_SE
+// GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 scope:SCOPE_SE ; encoding: [0x01,0xa5,0x20,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 scope:SCOPE_DEV
+// GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 scope:SCOPE_DEV ; encoding: [0x01,0xa5,0x40,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 scope:SCOPE_SYS
+// GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 scope:SCOPE_SYS ; encoding: [0x01,0xa5,0x60,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE
+// GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x01,0xa5,0x20,0xf5,0x00,0x00,0x00,0xd4]
+
+s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE
+// GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x01,0xa5,0xa0,0xf5,0x00,0x00,0x00,0xd4]
+
+s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS
+// GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x01,0xa5,0x60,0xf5,0x00,0x00,0x00,0xd4]
+
+s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_RT
+// GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 ; encoding: [0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_NT
+// GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_NT ; encoding: [0x01,0x45,0x80,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT
+// GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT ; encoding: [0x01,0x45,0x00,0xf5,0x00,0x00,0x00,0xd4]
+
+s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_LU
+// GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_LU ; encoding: [0x01,0x45,0x80,0xf5,0x00,0x00,0x00,0xd4]
+
+s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 scope:SCOPE_CU
+// GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 ; encoding: [0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 scope:SCOPE_SE
+// GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 scope:SCOPE_SE ; encoding: [0x01,0x45,0x20,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 scope:SCOPE_DEV
+// GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 scope:SCOPE_DEV ; encoding: [0x01,0x45,0x40,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 scope:SCOPE_SYS
+// GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 scope:SCOPE_SYS ; encoding: [0x01,0x45,0x60,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE
+// GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x01,0x45,0x20,0xf5,0x00,0x00,0x00,0xd4]
+
+s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE
+// GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x01,0x45,0xa0,0xf5,0x00,0x00,0x00,0xd4]
+
+s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS
+// GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x01,0x45,0x60,0xf5,0x00,0x00,0x00,0xd4]
+
+s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_RT
+// GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x77,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_NT
+// GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x01,0x77,0x80,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_HT
+// GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x01,0x77,0x00,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_LU
+// GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x01,0x77,0x80,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_b256 s[92:99], s[2:3], s0 offset:0x0 scope:SCOPE_CU
+// GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x77,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b256 s[92:99], s[2:3], s0 offset:0x0 scope:SCOPE_SE
+// GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x01,0x77,0x20,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b256 s[92:99], s[2:3], s0 offset:0x0 scope:SCOPE_DEV
+// GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x01,0x77,0x40,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b256 s[92:99], s[2:3], s0 offset:0x0 scope:SCOPE_SYS
+// GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x01,0x77,0x60,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE
+// GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x01,0x77,0x20,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE
+// GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x01,0x77,0xa0,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS
+// GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x01,0x77,0x60,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_RT
+// GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 ; encoding: [0x02,0x85,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_NT
+// GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x02,0x85,0x80,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_HT
+// GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x02,0x85,0x00,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_LU
+// GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x02,0x85,0x80,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_b512 s[20:35], s[4:5], s0 offset:0x0 scope:SCOPE_CU
+// GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 ; encoding: [0x02,0x85,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b512 s[20:35], s[4:5], s0 offset:0x0 scope:SCOPE_SE
+// GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x02,0x85,0x20,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b512 s[20:35], s[4:5], s0 offset:0x0 scope:SCOPE_DEV
+// GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x02,0x85,0x40,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b512 s[20:35], s[4:5], s0 offset:0x0 scope:SCOPE_SYS
+// GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x02,0x85,0x60,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE
+// GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x02,0x85,0x20,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE
+// GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x02,0x85,0xa0,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS
+// GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x02,0x85,0x60,0xf5,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_RT
+// GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 ; encoding: [0x70,0x01,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_NT
+// GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x70,0x01,0x82,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_HT
+// GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x70,0x01,0x02,0xf5,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_LU
+// GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x70,0x01,0x82,0xf5,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 scope:SCOPE_CU
+// GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 ; encoding: [0x70,0x01,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 scope:SCOPE_SE
+// GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x70,0x01,0x22,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 scope:SCOPE_DEV
+// GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x70,0x01,0x42,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 scope:SCOPE_SYS
+// GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x70,0x01,0x62,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE
+// GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x70,0x01,0x22,0xf5,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE
+// GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x70,0x01,0xa2,0xf5,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS
+// GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x70,0x01,0x62,0xf5,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_RT
+// GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 ; encoding: [0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_NT
+// GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x82,0x22,0x82,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_HT
+// GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x82,0x22,0x02,0xf5,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_LU
+// GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x82,0x22,0x82,0xf5,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 scope:SCOPE_CU
+// GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 ; encoding: [0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 scope:SCOPE_SE
+// GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x82,0x22,0x22,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 scope:SCOPE_DEV
+// GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x82,0x22,0x42,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 scope:SCOPE_SYS
+// GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x82,0x22,0x62,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE
+// GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x82,0x22,0x22,0xf5,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE
+// GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x82,0x22,0xa2,0xf5,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS
+// GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x82,0x22,0x62,0xf5,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_RT
+// GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 ; encoding: [0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_NT
+// GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_NT ; encoding: [0x02,0xa5,0x82,0xf4,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT
+// GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT ; encoding: [0x02,0xa5,0x02,0xf5,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_LU
+// GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_LU ; encoding: [0x02,0xa5,0x82,0xf5,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 scope:SCOPE_CU
+// GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 ; encoding: [0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 scope:SCOPE_SE
+// GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 scope:SCOPE_SE ; encoding: [0x02,0xa5,0x22,0xf4,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 scope:SCOPE_DEV
+// GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 scope:SCOPE_DEV ; encoding: [0x02,0xa5,0x42,0xf4,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 scope:SCOPE_SYS
+// GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 scope:SCOPE_SYS ; encoding: [0x02,0xa5,0x62,0xf4,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE
+// GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x02,0xa5,0x22,0xf5,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE
+// GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x02,0xa5,0xa2,0xf5,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS
+// GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x02,0xa5,0x62,0xf5,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_RT
+// GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 ; encoding: [0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_NT
+// GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_NT ; encoding: [0x02,0x45,0x82,0xf4,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT
+// GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT ; encoding: [0x02,0x45,0x02,0xf5,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_LU
+// GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_LU ; encoding: [0x02,0x45,0x82,0xf5,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 scope:SCOPE_CU
+// GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 ; encoding: [0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 scope:SCOPE_SE
+// GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 scope:SCOPE_SE ; encoding: [0x02,0x45,0x22,0xf4,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 scope:SCOPE_DEV
+// GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 scope:SCOPE_DEV ; encoding: [0x02,0x45,0x42,0xf4,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 scope:SCOPE_SYS
+// GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 scope:SCOPE_SYS ; encoding: [0x02,0x45,0x62,0xf4,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE
+// GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x02,0x45,0x22,0xf5,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE
+// GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x02,0x45,0xa2,0xf5,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS
+// GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x02,0x45,0x62,0xf5,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_RT
+// GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 ; encoding: [0x04,0x65,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_NT
+// GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x04,0x65,0x82,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_HT
+// GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x04,0x65,0x02,0xf5,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_LU
+// GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x04,0x65,0x82,0xf5,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 scope:SCOPE_CU
+// GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 ; encoding: [0x04,0x65,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 scope:SCOPE_SE
+// GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x04,0x65,0x22,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 scope:SCOPE_DEV
+// GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x04,0x65,0x42,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 scope:SCOPE_SYS
+// GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x04,0x65,0x62,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE
+// GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x04,0x65,0x22,0xf5,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE
+// GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x04,0x65,0xa2,0xf5,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS
+// GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x04,0x65,0x62,0xf5,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_RT
+// GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 ; encoding: [0x30,0x85,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_NT
+// GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x30,0x85,0x82,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_HT
+// GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x30,0x85,0x02,0xf5,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_LU
+// GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x30,0x85,0x82,0xf5,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 scope:SCOPE_CU
+// GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 ; encoding: [0x30,0x85,0x02,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 scope:SCOPE_SE
+// GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x30,0x85,0x22,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 scope:SCOPE_DEV
+// GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x30,0x85,0x42,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 scope:SCOPE_SYS
+// GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x30,0x85,0x62,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE
+// GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x30,0x85,0x22,0xf5,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE
+// GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x30,0x85,0xa2,0xf5,0x00,0x00,0x00,0x00]
+
+s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS
+// GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x30,0x85,0x62,0xf5,0x00,0x00,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx12_err.s b/llvm/test/MC/AMDGPU/gfx12_err.s
index b23e60082ffef3..b103d7cef97692 100644
--- a/llvm/test/MC/AMDGPU/gfx12_err.s
+++ b/llvm/test/MC/AMDGPU/gfx12_err.s
@@ -36,6 +36,15 @@ image_store v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_STORE_RT_WB scope:
 image_store v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_STORE_BYPASS scope:SCOPE_DEV
 // GFX12-ERR: [[@LINE-1]]:{{[0-9]+}}: error: scope and th combination is not valid
 
+s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_NT_RT
+// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid th value for SMEM instruction
+
+s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_RT_NT
+// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid th value for SMEM instruction
+
+s_load_b128 s[20:23], s[2:3], vcc_lo th:TH_LOAD_NT_HT
+// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid th value for SMEM instruction
+
 image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_LOAD_HT scope:SCOPE_SE th:TH_LOAD_HT
 // GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_smem.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_smem.txt
index 7843905797a62e..f46e74537b9c34 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_smem.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_smem.txt
@@ -1,5 +1,398 @@
 # RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12 %s
 
+# GFX12: s_load_b32 s101, s[2:3], s0 offset:0x0  ; encoding: [0x41,0x19,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x41,0x19,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b32 s5, s[100:101], s0 offset:0x0 ; encoding: [0x72,0x01,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x72,0x01,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b32 s5, s[2:3], 0x0              ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xf8]
+0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xf8
+
+# GFX12: s_load_b32 s5, s[2:3], m0 offset:0x0    ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xfa]
+0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xfa
+
+# GFX12: s_load_b32 s5, s[2:3], s0 offset:0x0    ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b32 s5, s[2:3], s101 offset:0x0  ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xca]
+0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xca
+
+# GFX12: s_load_b32 s5, s[2:3], vcc_hi offset:0x0 ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xd6]
+0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xd6
+
+# GFX12: s_load_b32 s5, s[2:3], vcc_lo offset:0x0 ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xd4]
+0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xd4
+
+# GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0    ; encoding: [0x42,0x01,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x42,0x01,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b32 s5, vcc, s0 offset:0x0       ; encoding: [0x75,0x01,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x75,0x01,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b32 vcc_hi, s[2:3], s0 offset:0x0 ; encoding: [0xc1,0x1a,0x00,0xf4,0x00,0x00,0x00,0x00]
+0xc1,0x1a,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b32 vcc_lo, s[2:3], s0 offset:0x0 ; encoding: [0x81,0x1a,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x81,0x1a,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b512 s[20:35], s[100:101], s0 offset:0x0 ; encoding: [0x32,0x85,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x32,0x85,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b512 s[20:35], s[2:3], 0x0       ; encoding: [0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0xf8]
+0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0xf8
+
+# GFX12: s_load_b512 s[20:35], s[2:3], m0 offset:0x0 ; encoding: [0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0xfa]
+0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0xfa
+
+# GFX12: s_load_b512 s[20:35], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b512 s[20:35], s[2:3], s101 offset:0x0 ; encoding: [0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0xca]
+0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0xca
+
+# GFX12: s_load_b512 s[20:35], s[2:3], vcc_hi offset:0x0 ; encoding: [0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0xd6]
+0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0xd6
+
+# GFX12: s_load_b512 s[20:35], s[2:3], vcc_lo offset:0x0 ; encoding: [0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0xd4]
+0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0xd4
+
+# GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 ; encoding: [0x02,0x85,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x02,0x85,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b512 s[20:35], vcc, s0 offset:0x0 ; encoding: [0x35,0x85,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x35,0x85,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b512 s[24:39], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x86,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x01,0x86,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b512 s[84:99], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x95,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x01,0x95,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b64 s[100:101], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x39,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x01,0x39,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b64 s[10:11], s[100:101], s0 offset:0x0 ; encoding: [0xb2,0x22,0x00,0xf4,0x00,0x00,0x00,0x00]
+0xb2,0x22,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b64 s[10:11], s[2:3], 0x0        ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xf8]
+0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xf8
+
+# GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xfa]
+0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xfa
+
+# GFX12: s_load_b64 s[10:11], s[2:3], s0 offset:0x0 ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b64 s[10:11], s[2:3], s101 offset:0x0 ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xca]
+0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xca
+
+# GFX12: s_load_b64 s[10:11], s[2:3], vcc_hi offset:0x0 ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xd6]
+0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xd6
+
+# GFX12: s_load_b64 s[10:11], s[2:3], vcc_lo offset:0x0 ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xd4]
+0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xd4
+
+# GFX12: s_load_b64 s[10:11], s[4:5], s0 offset:0x0 ; encoding: [0x82,0x22,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x82,0x22,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b64 s[10:11], vcc, s0 offset:0x0 ; encoding: [0xb5,0x22,0x00,0xf4,0x00,0x00,0x00,0x00]
+0xb5,0x22,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b64 s[12:13], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x23,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x01,0x23,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b64 vcc, s[2:3], s0 offset:0x0   ; encoding: [0x81,0x3a,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x81,0x3a,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b96 s[20:22], s[100:101], s0 offset:0x0 ; encoding: [0x32,0xa5,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x32,0xa5,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b96 s[20:22], s[2:3], 0x0       ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xf8]
+0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xf8
+
+# GFX12: s_load_b96 s[20:22], s[2:3], m0 offset:0x0 ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xfa]
+0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xfa
+
+# GFX12: s_load_b96 s[20:22], s[2:3], s0 offset:0x0 ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b96 s[20:22], s[2:3], s101 offset:0x0 ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xca]
+0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xca
+
+# GFX12: s_load_b96 s[20:22], s[2:3], vcc_hi offset:0x0 ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xd6]
+0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xd6
+
+# GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xd4]
+0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xd4
+
+# GFX12: s_load_b96 s[20:22], s[4:5], s0 offset:0x0 ; encoding: [0x02,0xa5,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x02,0xa5,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b96 s[20:22], vcc, s0 offset:0x0 ; encoding: [0x35,0xa5,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x35,0xa5,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b96 s[24:26], s[2:3], s0 offset:0x0 ; encoding: [0x01,0xa6,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x01,0xa6,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b96 s[96:98], s[2:3], s0 offset:0x0 ; encoding: [0x01,0xb8,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x01,0xb8,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b128 s[20:23], s[100:101], s0 offset:0x0 ; encoding: [0x32,0x45,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x32,0x45,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b128 s[20:23], s[2:3], 0x0       ; encoding: [0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xf8]
+0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xf8
+
+# GFX12: s_load_b128 s[20:23], s[2:3], m0 offset:0x0 ; encoding: [0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xfa]
+0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xfa
+
+# GFX12: s_load_b128 s[20:23], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b128 s[20:23], s[2:3], s101 offset:0x0 ; encoding: [0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xca]
+0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xca
+
+# GFX12: s_load_b128 s[20:23], s[2:3], vcc_hi offset:0x0 ; encoding: [0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xd6]
+0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xd6
+
+# GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 ; encoding: [0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xd4]
+0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xd4
+
+# GFX12: s_load_b128 s[20:23], s[4:5], s0 offset:0x0 ; encoding: [0x02,0x45,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x02,0x45,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b128 s[20:23], vcc, s0 offset:0x0 ; encoding: [0x35,0x45,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x35,0x45,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b128 s[24:27], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x46,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x01,0x46,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b128 s[96:99], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x58,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x01,0x58,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b256 s[20:27], s[100:101], s0 offset:0x0 ; encoding: [0x32,0x65,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x32,0x65,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b256 s[20:27], s[2:3], 0x0       ; encoding: [0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0xf8]
+0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0xf8
+
+# GFX12: s_load_b256 s[20:27], s[2:3], m0 offset:0x0 ; encoding: [0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0xfa]
+0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0xfa
+
+# GFX12: s_load_b256 s[20:27], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b256 s[20:27], s[2:3], s101 offset:0x0 ; encoding: [0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0xca]
+0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0xca
+
+# GFX12: s_load_b256 s[20:27], s[2:3], vcc_hi offset:0x0 ; encoding: [0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0xd6]
+0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0xd6
+
+# GFX12: s_load_b256 s[20:27], s[2:3], vcc_lo offset:0x0 ; encoding: [0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0xd4]
+0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0xd4
+
+# GFX12: s_load_b256 s[20:27], s[4:5], s0 offset:0x0 ; encoding: [0x02,0x65,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x02,0x65,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b256 s[20:27], vcc, s0 offset:0x0 ; encoding: [0x35,0x65,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x35,0x65,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b256 s[24:31], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x66,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x01,0x66,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x77,0x00,0xf4,0x00,0x00,0x00,0x00]
+0x01,0x77,0x00,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b32 s101, s[4:7], s0 offset:0x0 ; encoding: [0x42,0x19,0x02,0xf4,0x00,0x00,0x00,0x00]
+0x42,0x19,0x02,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b32 s5, s[4:7], 0x0       ; encoding: [0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0xf8]
+0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0xf8
+
+# GFX12: s_buffer_load_b32 s5, s[4:7], m0 offset:0x0 ; encoding: [0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0xfa]
+0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0xfa
+
+# GFX12: s_buffer_load_b32 s5, s[4:7], s0 offset:0x0 ; encoding: [0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0x00]
+0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b32 s5, s[4:7], s101 offset:0x0 ; encoding: [0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0xca]
+0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0xca
+
+# GFX12: s_buffer_load_b32 s5, s[4:7], vcc_hi offset:0x0 ; encoding: [0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0xd6]
+0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0xd6
+
+# GFX12: s_buffer_load_b32 s5, s[4:7], vcc_lo offset:0x0 ; encoding: [0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0xd4]
+0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0xd4
+
+# GFX12: s_buffer_load_b32 s5, s[8:11], s0 offset:0x0 ; encoding: [0x44,0x01,0x02,0xf4,0x00,0x00,0x00,0x00]
+0x44,0x01,0x02,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 ; encoding: [0x70,0x01,0x02,0xf4,0x00,0x00,0x00,0x00]
+0x70,0x01,0x02,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b32 vcc_hi, s[4:7], s0 offset:0x0 ; encoding: [0xc2,0x1a,0x02,0xf4,0x00,0x00,0x00,0x00]
+0xc2,0x1a,0x02,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b32 vcc_lo, s[4:7], s0 offset:0x0 ; encoding: [0x82,0x1a,0x02,0xf4,0x00,0x00,0x00,0x00]
+0x82,0x1a,0x02,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b512 s[20:35], s[4:7], 0x0 ; encoding: [0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0xf8]
+0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0xf8
+
+# GFX12: s_buffer_load_b512 s[20:35], s[4:7], m0 offset:0x0 ; encoding: [0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0xfa]
+0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0xfa
+
+# GFX12: s_buffer_load_b512 s[20:35], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0x00]
+0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b512 s[20:35], s[4:7], s101 offset:0x0 ; encoding: [0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0xca]
+0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0xca
+
+# GFX12: s_buffer_load_b512 s[20:35], s[4:7], vcc_hi offset:0x0 ; encoding: [0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0xd6]
+0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0xd6
+
+# GFX12: s_buffer_load_b512 s[20:35], s[4:7], vcc_lo offset:0x0 ; encoding: [0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0xd4]
+0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0xd4
+
+# GFX12: s_buffer_load_b512 s[20:35], s[8:11], s0 offset:0x0 ; encoding: [0x04,0x85,0x02,0xf4,0x00,0x00,0x00,0x00]
+0x04,0x85,0x02,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 ; encoding: [0x30,0x85,0x02,0xf4,0x00,0x00,0x00,0x00]
+0x30,0x85,0x02,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b512 s[24:39], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x86,0x02,0xf4,0x00,0x00,0x00,0x00]
+0x02,0x86,0x02,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b512 s[84:99], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x95,0x02,0xf4,0x00,0x00,0x00,0x00]
+0x02,0x95,0x02,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b64 s[100:101], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x39,0x02,0xf4,0x00,0x00,0x00,0x00]
+0x02,0x39,0x02,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b64 s[10:11], s[4:7], 0x0 ; encoding: [0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0xf8]
+0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0xf8
+
+# GFX12: s_buffer_load_b64 s[10:11], s[4:7], m0 offset:0x0 ; encoding: [0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0xfa]
+0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0xfa
+
+# GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 ; encoding: [0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0x00]
+0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b64 s[10:11], s[4:7], s101 offset:0x0 ; encoding: [0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0xca]
+0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0xca
+
+# GFX12: s_buffer_load_b64 s[10:11], s[4:7], vcc_hi offset:0x0 ; encoding: [0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0xd6]
+0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0xd6
+
+# GFX12: s_buffer_load_b64 s[10:11], s[4:7], vcc_lo offset:0x0 ; encoding: [0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0xd4]
+0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0xd4
+
+# GFX12: s_buffer_load_b64 s[10:11], s[8:11], s0 offset:0x0 ; encoding: [0x84,0x22,0x02,0xf4,0x00,0x00,0x00,0x00]
+0x84,0x22,0x02,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b64 s[10:11], s[96:99], s0 offset:0x0 ; encoding: [0xb0,0x22,0x02,0xf4,0x00,0x00,0x00,0x00]
+0xb0,0x22,0x02,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b64 s[12:13], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x23,0x02,0xf4,0x00,0x00,0x00,0x00]
+0x02,0x23,0x02,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b64 vcc, s[4:7], s0 offset:0x0 ; encoding: [0x82,0x3a,0x02,0xf4,0x00,0x00,0x00,0x00]
+0x82,0x3a,0x02,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b96 s[20:22], s[4:7], 0x0 ; encoding: [0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xf8]
+0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xf8
+
+# GFX12: s_buffer_load_b96 s[20:22], s[4:7], m0 offset:0x0 ; encoding: [0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xfa]
+0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xfa
+
+# GFX12: s_buffer_load_b96 s[20:22], s[4:7], s0 offset:0x0 ; encoding: [0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0x00]
+0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b96 s[20:22], s[4:7], s101 offset:0x0 ; encoding: [0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xca]
+0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xca
+
+# GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_hi offset:0x0 ; encoding: [0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xd6]
+0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xd6
+
+# GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 ; encoding: [0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xd4]
+0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xd4
+
+# GFX12: s_buffer_load_b96 s[20:22], s[8:11], s0 offset:0x0 ; encoding: [0x04,0xa5,0x02,0xf4,0x00,0x00,0x00,0x00]
+0x04,0xa5,0x02,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b96 s[20:22], s[96:99], s0 offset:0x0 ; encoding: [0x30,0xa5,0x02,0xf4,0x00,0x00,0x00,0x00]
+0x30,0xa5,0x02,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b96 s[24:26], s[4:7], s0 offset:0x0 ; encoding: [0x02,0xa6,0x02,0xf4,0x00,0x00,0x00,0x00]
+0x02,0xa6,0x02,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b96 s[96:98], s[4:7], s0 offset:0x0 ; encoding: [0x02,0xb8,0x02,0xf4,0x00,0x00,0x00,0x00]
+0x02,0xb8,0x02,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b128 s[20:23], s[4:7], 0x0 ; encoding: [0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xf8]
+0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xf8
+
+# GFX12: s_buffer_load_b128 s[20:23], s[4:7], m0 offset:0x0 ; encoding: [0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xfa]
+0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xfa
+
+# GFX12: s_buffer_load_b128 s[20:23], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0x00]
+0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b128 s[20:23], s[4:7], s101 offset:0x0 ; encoding: [0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xca]
+0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xca
+
+# GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_hi offset:0x0 ; encoding: [0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xd6]
+0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xd6
+
+# GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 ; encoding: [0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xd4]
+0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xd4
+
+# GFX12: s_buffer_load_b128 s[20:23], s[8:11], s0 offset:0x0 ; encoding: [0x04,0x45,0x02,0xf4,0x00,0x00,0x00,0x00]
+0x04,0x45,0x02,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b128 s[20:23], s[96:99], s0 offset:0x0 ; encoding: [0x30,0x45,0x02,0xf4,0x00,0x00,0x00,0x00]
+0x30,0x45,0x02,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b128 s[24:27], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x46,0x02,0xf4,0x00,0x00,0x00,0x00]
+0x02,0x46,0x02,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b128 s[96:99], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x58,0x02,0xf4,0x00,0x00,0x00,0x00]
+0x02,0x58,0x02,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b256 s[20:27], s[4:7], 0x0 ; encoding: [0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0xf8]
+0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0xf8
+
+# GFX12: s_buffer_load_b256 s[20:27], s[4:7], m0 offset:0x0 ; encoding: [0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0xfa]
+0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0xfa
+
+# GFX12: s_buffer_load_b256 s[20:27], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0x00]
+0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b256 s[20:27], s[4:7], s101 offset:0x0 ; encoding: [0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0xca]
+0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0xca
+
+# GFX12: s_buffer_load_b256 s[20:27], s[4:7], vcc_hi offset:0x0 ; encoding: [0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0xd6]
+0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0xd6
+
+# GFX12: s_buffer_load_b256 s[20:27], s[4:7], vcc_lo offset:0x0 ; encoding: [0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0xd4]
+0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0xd4
+
+# GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 ; encoding: [0x04,0x65,0x02,0xf4,0x00,0x00,0x00,0x00]
+0x04,0x65,0x02,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b256 s[20:27], s[96:99], s0 offset:0x0 ; encoding: [0x30,0x65,0x02,0xf4,0x00,0x00,0x00,0x00]
+0x30,0x65,0x02,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b256 s[24:31], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x66,0x02,0xf4,0x00,0x00,0x00,0x00]
+0x02,0x66,0x02,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b256 s[92:99], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x77,0x02,0xf4,0x00,0x00,0x00,0x00]
+0x02,0x77,0x02,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_dcache_inv                            ; encoding: [0x00,0x20,0x04,0xf4,0x00,0x00,0x00,0x00]
+0x00,0x20,0x04,0xf4,0x00,0x00,0x00,0x00
+
 # GFX12: s_prefetch_inst s[12:13], 0x10, s4, 2   ; encoding: [0x86,0x80,0x04,0xf4,0x10,0x00,0x00,0x08]
 0x86,0x80,0x04,0xf4,0x10,0x00,0x00,0x08
 
@@ -29,3 +422,345 @@
 
 # GFX12: s_buffer_prefetch_data s[20:23], 0x64, null, 7 ; encoding: [0xca,0xe1,0x04,0xf4,0x64,0x00,0x00,0xf8]
 0xca,0xe1,0x04,0xf4,0x64,0x00,0x00,0xf8
+
+# GFX12: s_atc_probe 7, s[4:5], s2 offset:0x0 ; encoding: [0xc2,0x41,0x04,0xf4,0x00,0x00,0x00,0x04]
+0xc2,0x41,0x04,0xf4,0x00,0x00,0x00,0x04
+
+# GFX12: s_atc_probe 7, s[4:5], 0x64 ; encoding: [0xc2,0x41,0x04,0xf4,0x64,0x00,0x00,0xf8]
+0xc2,0x41,0x04,0xf4,0x64,0x00,0x00,0xf8
+
+# GFX12: s_atc_probe 7, s[4:5], s9 offset:0x64 ; encoding: [0xc2,0x41,0x04,0xf4,0x64,0x00,0x00,0x12]
+0xc2,0x41,0x04,0xf4,0x64,0x00,0x00,0x12
+
+# GFX12: s_atc_probe_buffer 7, s[8:11], s2 offset:0x0 ; encoding: [0xc4,0x61,0x04,0xf4,0x00,0x00,0x00,0x04]
+0xc4,0x61,0x04,0xf4,0x00,0x00,0x00,0x04
+
+# GFX12: s_atc_probe_buffer 7, s[8:11], 0x64 ; encoding: [0xc4,0x61,0x04,0xf4,0x64,0x00,0x00,0xf8]
+0xc4,0x61,0x04,0xf4,0x64,0x00,0x00,0xf8
+
+# GFX12: s_atc_probe_buffer 7, s[8:11], s9 offset:0x64 ; encoding: [0xc4,0x61,0x04,0xf4,0x64,0x00,0x00,0x12]
+0xc4,0x61,0x04,0xf4,0x64,0x00,0x00,0x12
+
+# GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_NT ; encoding:  [0x42,0x01,0x80,0xf4,0x00,0x00,0x00,0x00]
+0x42,0x01,0x80,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_HT ; encoding:  [0x42,0x01,0x00,0xf5,0x00,0x00,0x00,0x00]
+0x42,0x01,0x00,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_LU ; encoding:  [0x42,0x01,0x80,0xf5,0x00,0x00,0x00,0x00]
+0x42,0x01,0x80,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 scope:SCOPE_SE ; encoding:  [0x42,0x01,0x20,0xf4,0x00,0x00,0x00,0x00]
+0x42,0x01,0x20,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 scope:SCOPE_DEV ; encoding:  [0x42,0x01,0x40,0xf4,0x00,0x00,0x00,0x00]
+0x42,0x01,0x40,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 scope:SCOPE_SYS ; encoding:  [0x42,0x01,0x60,0xf4,0x00,0x00,0x00,0x00]
+0x42,0x01,0x60,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding:  [0x42,0x01,0x20,0xf5,0x00,0x00,0x00,0x00]
+0x42,0x01,0x20,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding:  [0x42,0x01,0xa0,0xf5,0x00,0x00,0x00,0x00]
+0x42,0x01,0xa0,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding:  [0x42,0x01,0x60,0xf5,0x00,0x00,0x00,0x00]
+0x42,0x01,0x60,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_NT ; encoding:  [0x81,0x22,0x80,0xf4,0x00,0x00,0x00,0xfa]
+0x81,0x22,0x80,0xf4,0x00,0x00,0x00,0xfa
+
+# GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_HT ; encoding:  [0x81,0x22,0x00,0xf5,0x00,0x00,0x00,0xfa]
+0x81,0x22,0x00,0xf5,0x00,0x00,0x00,0xfa
+
+# GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_LU ; encoding:  [0x81,0x22,0x80,0xf5,0x00,0x00,0x00,0xfa]
+0x81,0x22,0x80,0xf5,0x00,0x00,0x00,0xfa
+
+# GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 scope:SCOPE_SE ; encoding:  [0x81,0x22,0x20,0xf4,0x00,0x00,0x00,0xfa]
+0x81,0x22,0x20,0xf4,0x00,0x00,0x00,0xfa
+
+# GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 scope:SCOPE_DEV ; encoding:  [0x81,0x22,0x40,0xf4,0x00,0x00,0x00,0xfa]
+0x81,0x22,0x40,0xf4,0x00,0x00,0x00,0xfa
+
+# GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 scope:SCOPE_SYS ; encoding:  [0x81,0x22,0x60,0xf4,0x00,0x00,0x00,0xfa]
+0x81,0x22,0x60,0xf4,0x00,0x00,0x00,0xfa
+
+# GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding:  [0x81,0x22,0x20,0xf5,0x00,0x00,0x00,0xfa]
+0x81,0x22,0x20,0xf5,0x00,0x00,0x00,0xfa
+
+# GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding:  [0x81,0x22,0xa0,0xf5,0x00,0x00,0x00,0xfa]
+0x81,0x22,0xa0,0xf5,0x00,0x00,0x00,0xfa
+
+# GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding:  [0x81,0x22,0x60,0xf5,0x00,0x00,0x00,0xfa]
+0x81,0x22,0x60,0xf5,0x00,0x00,0x00,0xfa
+
+# GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_NT ; encoding:  [0x01,0xa5,0x80,0xf4,0x00,0x00,0x00,0xd4]
+0x01,0xa5,0x80,0xf4,0x00,0x00,0x00,0xd4
+
+# GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT ; encoding:  [0x01,0xa5,0x00,0xf5,0x00,0x00,0x00,0xd4]
+0x01,0xa5,0x00,0xf5,0x00,0x00,0x00,0xd4
+
+# GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_LU ; encoding:  [0x01,0xa5,0x80,0xf5,0x00,0x00,0x00,0xd4]
+0x01,0xa5,0x80,0xf5,0x00,0x00,0x00,0xd4
+
+# GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 scope:SCOPE_SE ; encoding:  [0x01,0xa5,0x20,0xf4,0x00,0x00,0x00,0xd4]
+0x01,0xa5,0x20,0xf4,0x00,0x00,0x00,0xd4
+
+# GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 scope:SCOPE_DEV ; encoding:  [0x01,0xa5,0x40,0xf4,0x00,0x00,0x00,0xd4]
+0x01,0xa5,0x40,0xf4,0x00,0x00,0x00,0xd4
+
+# GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 scope:SCOPE_SYS ; encoding:  [0x01,0xa5,0x60,0xf4,0x00,0x00,0x00,0xd4]
+0x01,0xa5,0x60,0xf4,0x00,0x00,0x00,0xd4
+
+# GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding:  [0x01,0xa5,0x20,0xf5,0x00,0x00,0x00,0xd4]
+0x01,0xa5,0x20,0xf5,0x00,0x00,0x00,0xd4
+
+# GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding:  [0x01,0xa5,0xa0,0xf5,0x00,0x00,0x00,0xd4]
+0x01,0xa5,0xa0,0xf5,0x00,0x00,0x00,0xd4
+
+# GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding:  [0x01,0xa5,0x60,0xf5,0x00,0x00,0x00,0xd4]
+0x01,0xa5,0x60,0xf5,0x00,0x00,0x00,0xd4
+
+# GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_NT ; encoding:  [0x01,0x45,0x80,0xf4,0x00,0x00,0x00,0xd4]
+0x01,0x45,0x80,0xf4,0x00,0x00,0x00,0xd4
+
+# GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT ; encoding:  [0x01,0x45,0x00,0xf5,0x00,0x00,0x00,0xd4]
+0x01,0x45,0x00,0xf5,0x00,0x00,0x00,0xd4
+
+# GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_LU ; encoding:  [0x01,0x45,0x80,0xf5,0x00,0x00,0x00,0xd4]
+0x01,0x45,0x80,0xf5,0x00,0x00,0x00,0xd4
+
+# GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 scope:SCOPE_SE ; encoding:  [0x01,0x45,0x20,0xf4,0x00,0x00,0x00,0xd4]
+0x01,0x45,0x20,0xf4,0x00,0x00,0x00,0xd4
+
+# GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 scope:SCOPE_DEV ; encoding:  [0x01,0x45,0x40,0xf4,0x00,0x00,0x00,0xd4]
+0x01,0x45,0x40,0xf4,0x00,0x00,0x00,0xd4
+
+# GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 scope:SCOPE_SYS ; encoding:  [0x01,0x45,0x60,0xf4,0x00,0x00,0x00,0xd4]
+0x01,0x45,0x60,0xf4,0x00,0x00,0x00,0xd4
+
+# GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding:  [0x01,0x45,0x20,0xf5,0x00,0x00,0x00,0xd4]
+0x01,0x45,0x20,0xf5,0x00,0x00,0x00,0xd4
+
+# GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding:  [0x01,0x45,0xa0,0xf5,0x00,0x00,0x00,0xd4]
+0x01,0x45,0xa0,0xf5,0x00,0x00,0x00,0xd4
+
+# GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding:  [0x01,0x45,0x60,0xf5,0x00,0x00,0x00,0xd4]
+0x01,0x45,0x60,0xf5,0x00,0x00,0x00,0xd4
+
+# GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_NT ; encoding:  [0x01,0x77,0x80,0xf4,0x00,0x00,0x00,0x00]
+0x01,0x77,0x80,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_HT ; encoding:  [0x01,0x77,0x00,0xf5,0x00,0x00,0x00,0x00]
+0x01,0x77,0x00,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_LU ; encoding:  [0x01,0x77,0x80,0xf5,0x00,0x00,0x00,0x00]
+0x01,0x77,0x80,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 scope:SCOPE_SE ; encoding:  [0x01,0x77,0x20,0xf4,0x00,0x00,0x00,0x00]
+0x01,0x77,0x20,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 scope:SCOPE_DEV ; encoding:  [0x01,0x77,0x40,0xf4,0x00,0x00,0x00,0x00]
+0x01,0x77,0x40,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 scope:SCOPE_SYS ; encoding:  [0x01,0x77,0x60,0xf4,0x00,0x00,0x00,0x00]
+0x01,0x77,0x60,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding:  [0x01,0x77,0x20,0xf5,0x00,0x00,0x00,0x00]
+0x01,0x77,0x20,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding:  [0x01,0x77,0xa0,0xf5,0x00,0x00,0x00,0x00]
+0x01,0x77,0xa0,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding:  [0x01,0x77,0x60,0xf5,0x00,0x00,0x00,0x00]
+0x01,0x77,0x60,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_NT ; encoding:  [0x02,0x85,0x80,0xf4,0x00,0x00,0x00,0x00]
+0x02,0x85,0x80,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_HT ; encoding:  [0x02,0x85,0x00,0xf5,0x00,0x00,0x00,0x00]
+0x02,0x85,0x00,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_LU ; encoding:  [0x02,0x85,0x80,0xf5,0x00,0x00,0x00,0x00]
+0x02,0x85,0x80,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 scope:SCOPE_SE ; encoding:  [0x02,0x85,0x20,0xf4,0x00,0x00,0x00,0x00]
+0x02,0x85,0x20,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 scope:SCOPE_DEV ; encoding:  [0x02,0x85,0x40,0xf4,0x00,0x00,0x00,0x00]
+0x02,0x85,0x40,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 scope:SCOPE_SYS ; encoding:  [0x02,0x85,0x60,0xf4,0x00,0x00,0x00,0x00]
+0x02,0x85,0x60,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding:  [0x02,0x85,0x20,0xf5,0x00,0x00,0x00,0x00]
+0x02,0x85,0x20,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding:  [0x02,0x85,0xa0,0xf5,0x00,0x00,0x00,0x00]
+0x02,0x85,0xa0,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding:  [0x02,0x85,0x60,0xf5,0x00,0x00,0x00,0x00]
+0x02,0x85,0x60,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_NT ; encoding:  [0x70,0x01,0x82,0xf4,0x00,0x00,0x00,0x00]
+0x70,0x01,0x82,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_HT ; encoding:  [0x70,0x01,0x02,0xf5,0x00,0x00,0x00,0x00]
+0x70,0x01,0x02,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_LU ; encoding:  [0x70,0x01,0x82,0xf5,0x00,0x00,0x00,0x00]
+0x70,0x01,0x82,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 scope:SCOPE_SE ; encoding:  [0x70,0x01,0x22,0xf4,0x00,0x00,0x00,0x00]
+0x70,0x01,0x22,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 scope:SCOPE_DEV ; encoding:  [0x70,0x01,0x42,0xf4,0x00,0x00,0x00,0x00]
+0x70,0x01,0x42,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 scope:SCOPE_SYS ; encoding:  [0x70,0x01,0x62,0xf4,0x00,0x00,0x00,0x00]
+0x70,0x01,0x62,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding:  [0x70,0x01,0x22,0xf5,0x00,0x00,0x00,0x00]
+0x70,0x01,0x22,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding:  [0x70,0x01,0xa2,0xf5,0x00,0x00,0x00,0x00]
+0x70,0x01,0xa2,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding:  [0x70,0x01,0x62,0xf5,0x00,0x00,0x00,0x00]
+0x70,0x01,0x62,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_NT ; encoding:  [0x82,0x22,0x82,0xf4,0x00,0x00,0x00,0x00]
+0x82,0x22,0x82,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_HT ; encoding:  [0x82,0x22,0x02,0xf5,0x00,0x00,0x00,0x00]
+0x82,0x22,0x02,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_LU ; encoding:  [0x82,0x22,0x82,0xf5,0x00,0x00,0x00,0x00]
+0x82,0x22,0x82,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 scope:SCOPE_SE ; encoding:  [0x82,0x22,0x22,0xf4,0x00,0x00,0x00,0x00]
+0x82,0x22,0x22,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 scope:SCOPE_DEV ; encoding:  [0x82,0x22,0x42,0xf4,0x00,0x00,0x00,0x00]
+0x82,0x22,0x42,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 scope:SCOPE_SYS ; encoding:  [0x82,0x22,0x62,0xf4,0x00,0x00,0x00,0x00]
+0x82,0x22,0x62,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding:  [0x82,0x22,0x22,0xf5,0x00,0x00,0x00,0x00]
+0x82,0x22,0x22,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding:  [0x82,0x22,0xa2,0xf5,0x00,0x00,0x00,0x00]
+0x82,0x22,0xa2,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding:  [0x82,0x22,0x62,0xf5,0x00,0x00,0x00,0x00]
+0x82,0x22,0x62,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_NT ; encoding:  [0x02,0xa5,0x82,0xf4,0x00,0x00,0x00,0xd4]
+0x02,0xa5,0x82,0xf4,0x00,0x00,0x00,0xd4
+
+# GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT ; encoding:  [0x02,0xa5,0x02,0xf5,0x00,0x00,0x00,0xd4]
+0x02,0xa5,0x02,0xf5,0x00,0x00,0x00,0xd4
+
+# GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_LU ; encoding:  [0x02,0xa5,0x82,0xf5,0x00,0x00,0x00,0xd4]
+0x02,0xa5,0x82,0xf5,0x00,0x00,0x00,0xd4
+
+# GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 scope:SCOPE_SE ; encoding:  [0x02,0xa5,0x22,0xf4,0x00,0x00,0x00,0xd4]
+0x02,0xa5,0x22,0xf4,0x00,0x00,0x00,0xd4
+
+# GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 scope:SCOPE_DEV ; encoding:  [0x02,0xa5,0x42,0xf4,0x00,0x00,0x00,0xd4]
+0x02,0xa5,0x42,0xf4,0x00,0x00,0x00,0xd4
+
+# GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 scope:SCOPE_SYS ; encoding:  [0x02,0xa5,0x62,0xf4,0x00,0x00,0x00,0xd4]
+0x02,0xa5,0x62,0xf4,0x00,0x00,0x00,0xd4
+
+# GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding:  [0x02,0xa5,0x22,0xf5,0x00,0x00,0x00,0xd4]
+0x02,0xa5,0x22,0xf5,0x00,0x00,0x00,0xd4
+
+# GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding:  [0x02,0xa5,0xa2,0xf5,0x00,0x00,0x00,0xd4]
+0x02,0xa5,0xa2,0xf5,0x00,0x00,0x00,0xd4
+
+# GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding:  [0x02,0xa5,0x62,0xf5,0x00,0x00,0x00,0xd4]
+0x02,0xa5,0x62,0xf5,0x00,0x00,0x00,0xd4
+
+# GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_NT ; encoding:  [0x02,0x45,0x82,0xf4,0x00,0x00,0x00,0xd4]
+0x02,0x45,0x82,0xf4,0x00,0x00,0x00,0xd4
+
+# GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT ; encoding:  [0x02,0x45,0x02,0xf5,0x00,0x00,0x00,0xd4]
+0x02,0x45,0x02,0xf5,0x00,0x00,0x00,0xd4
+
+# GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_LU ; encoding:  [0x02,0x45,0x82,0xf5,0x00,0x00,0x00,0xd4]
+0x02,0x45,0x82,0xf5,0x00,0x00,0x00,0xd4
+
+# GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 scope:SCOPE_SE ; encoding:  [0x02,0x45,0x22,0xf4,0x00,0x00,0x00,0xd4]
+0x02,0x45,0x22,0xf4,0x00,0x00,0x00,0xd4
+
+# GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 scope:SCOPE_DEV ; encoding:  [0x02,0x45,0x42,0xf4,0x00,0x00,0x00,0xd4]
+0x02,0x45,0x42,0xf4,0x00,0x00,0x00,0xd4
+
+# GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 scope:SCOPE_SYS ; encoding:  [0x02,0x45,0x62,0xf4,0x00,0x00,0x00,0xd4]
+0x02,0x45,0x62,0xf4,0x00,0x00,0x00,0xd4
+
+# GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding:  [0x02,0x45,0x22,0xf5,0x00,0x00,0x00,0xd4]
+0x02,0x45,0x22,0xf5,0x00,0x00,0x00,0xd4
+
+# GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding:  [0x02,0x45,0xa2,0xf5,0x00,0x00,0x00,0xd4]
+0x02,0x45,0xa2,0xf5,0x00,0x00,0x00,0xd4
+
+# GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding:  [0x02,0x45,0x62,0xf5,0x00,0x00,0x00,0xd4]
+0x02,0x45,0x62,0xf5,0x00,0x00,0x00,0xd4
+
+# GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_NT ; encoding:  [0x04,0x65,0x82,0xf4,0x00,0x00,0x00,0x00]
+0x04,0x65,0x82,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_HT ; encoding:  [0x04,0x65,0x02,0xf5,0x00,0x00,0x00,0x00]
+0x04,0x65,0x02,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_LU ; encoding:  [0x04,0x65,0x82,0xf5,0x00,0x00,0x00,0x00]
+0x04,0x65,0x82,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 scope:SCOPE_SE ; encoding:  [0x04,0x65,0x22,0xf4,0x00,0x00,0x00,0x00]
+0x04,0x65,0x22,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 scope:SCOPE_DEV ; encoding:  [0x04,0x65,0x42,0xf4,0x00,0x00,0x00,0x00]
+0x04,0x65,0x42,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 scope:SCOPE_SYS ; encoding:  [0x04,0x65,0x62,0xf4,0x00,0x00,0x00,0x00]
+0x04,0x65,0x62,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding:  [0x04,0x65,0x22,0xf5,0x00,0x00,0x00,0x00]
+0x04,0x65,0x22,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding:  [0x04,0x65,0xa2,0xf5,0x00,0x00,0x00,0x00]
+0x04,0x65,0xa2,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding:  [0x04,0x65,0x62,0xf5,0x00,0x00,0x00,0x00]
+0x04,0x65,0x62,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_NT ; encoding:  [0x30,0x85,0x82,0xf4,0x00,0x00,0x00,0x00]
+0x30,0x85,0x82,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_HT ; encoding:  [0x30,0x85,0x02,0xf5,0x00,0x00,0x00,0x00]
+0x30,0x85,0x02,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_LU ; encoding:  [0x30,0x85,0x82,0xf5,0x00,0x00,0x00,0x00]
+0x30,0x85,0x82,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 scope:SCOPE_SE ; encoding:  [0x30,0x85,0x22,0xf4,0x00,0x00,0x00,0x00]
+0x30,0x85,0x22,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 scope:SCOPE_DEV ; encoding:  [0x30,0x85,0x42,0xf4,0x00,0x00,0x00,0x00]
+0x30,0x85,0x42,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 scope:SCOPE_SYS ; encoding:  [0x30,0x85,0x62,0xf4,0x00,0x00,0x00,0x00]
+0x30,0x85,0x62,0xf4,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding:  [0x30,0x85,0x22,0xf5,0x00,0x00,0x00,0x00]
+0x30,0x85,0x22,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding:  [0x30,0x85,0xa2,0xf5,0x00,0x00,0x00,0x00]
+0x30,0x85,0xa2,0xf5,0x00,0x00,0x00,0x00
+
+# GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding:  [0x30,0x85,0x62,0xf5,0x00,0x00,0x00,0x00]
+0x30,0x85,0x62,0xf5,0x00,0x00,0x00,0x00



More information about the llvm-commits mailing list