[llvm] 655c31d - [AMDGPU] Change scale_src2 encoding from vgpr0 to literal 0 (#178404)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 29 01:21:58 PST 2026


Author: Jay Foad
Date: 2026-01-29T09:21:53Z
New Revision: 655c31dcab304cfced38dd15d2c15a4bea4145e4

URL: https://github.com/llvm/llvm-project/commit/655c31dcab304cfced38dd15d2c15a4bea4145e4
DIFF: https://github.com/llvm/llvm-project/commit/655c31dcab304cfced38dd15d2c15a4bea4145e4.diff

LOG: [AMDGPU] Change scale_src2 encoding from vgpr0 to literal 0 (#178404)

This changes the fix from #167777 to use the encoding for literal 0
instead of the encoding for vgpr0, to match new SP3 behaviour and for
consistency with all other unused VALU source operands since #175753.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
    llvm/lib/Target/AMDGPU/VOP3PInstructions.td
    llvm/test/MC/AMDGPU/gfx1250_asm_wmma_w32.s
    llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_wmma_w32.txt

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
index 1420472b1a9b0..029d2eab0a9df 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
@@ -102,9 +102,6 @@ class AMDGPUMCCodeEmitter : public MCCodeEmitter {
 
   APInt postEncodeVOPCX(const MCInst &MI, APInt EncodedValue,
                         const MCSubtargetInfo &STI) const;
-
-  APInt postEncodeLdScale(const MCInst &MI, APInt EncodedValue,
-                          const MCSubtargetInfo &STI) const;
 };
 
 } // end anonymous namespace
@@ -767,14 +764,4 @@ APInt AMDGPUMCCodeEmitter::postEncodeVOPCX(const MCInst &MI, APInt EncodedValue,
   return postEncodeVOP3<true, true, false>(MI, EncodedValue, STI);
 }
 
-APInt AMDGPUMCCodeEmitter::postEncodeLdScale(const MCInst &MI,
-                                             APInt EncodedValue,
-                                             const MCSubtargetInfo &STI) const {
-  // Set unused scale_src2 field to VGPR0 to avoid hardware conservatively
-  // assuming the instruction reads SGPRs.
-  constexpr uint64_t Vgpr0 = 0x100;
-  EncodedValue |= Vgpr0 << 50;
-  return EncodedValue;
-}
-
 #include "AMDGPUGenMCCodeEmitter.inc"

diff  --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index ca7dfa734e94d..9fb28ef97ec21 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -2291,7 +2291,7 @@ multiclass VOP3PX2_Real_ScaledWMMA_F4<string Gen, bits<8> op, bits<8> LdScaleOp,
   if !eq(Gen, "gfx1250") then {
     def _gfx1250 : VOP3P_Real_Gen<PS, GFX1250Gen, PS.Mnemonic>,
                    VOP3PX2e <op, LdScaleOp, WMMAP> {
-      let PostEncoderMethod = "postEncodeLdScale";
+      let PostEncoderMethod = "postEncodeVOP3<true, true, false>";
     }
   }
 }
@@ -2305,7 +2305,7 @@ multiclass VOP3PX2_Real_ScaledWMMA<string Gen, bits<8> op, bits<8> LdScaleOp, VO
                    VOP3PX2e <op, LdScaleOp, WMMAP>,
                    MFMA_F8F6F4_WithSizeTable_Helper<PS, psName # "_f8_f8_w32_" # Gen> {
       let AsmString = asmName # PS.AsmOperands;
-      let PostEncoderMethod = "postEncodeLdScale";
+      let PostEncoderMethod = "postEncodeVOP3<true, true, false>";
     }
   }
 }
@@ -2504,7 +2504,7 @@ defm V_FMA_MIX_F32_BF16 : VOP3P_Realtriple<GFX1250Gen, 0x3d>;
 defm V_FMA_MIXLO_BF16   : VOP3P_Realtriple<GFX1250Gen, 0x3e>;
 defm V_FMA_MIXHI_BF16   : VOP3P_Realtriple<GFX1250Gen, 0x3f>;
 
-let PostEncoderMethod = "postEncodeLdScale" in {
+let PostEncoderMethod = "postEncodeVOP3<true, true, false>" in {
   defm V_WMMA_LD_SCALE_PAIRED_B32   : VOP3P_Real_gfx1250<0x35>;
   defm V_WMMA_LD_SCALE16_PAIRED_B64 : VOP3P_Real_gfx1250<0x3a>;
 }

diff  --git a/llvm/test/MC/AMDGPU/gfx1250_asm_wmma_w32.s b/llvm/test/MC/AMDGPU/gfx1250_asm_wmma_w32.s
index a49106095d864..5cd254ec6bb6b 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_wmma_w32.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_wmma_w32.s
@@ -1000,442 +1000,442 @@ v_wmma_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47] neg_hi:[0,0,1]
 
 v_wmma_ld_scale_paired_b32 v1, v2
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2       ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x04]
+// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2       ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x02]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale_paired_b32 s1, s2
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale_paired_b32 s1, s2       ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x04,0x00,0x04]
+// GFX1250: v_wmma_ld_scale_paired_b32 s1, s2       ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x04,0x00,0x02]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale_paired_b32 2, -4
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale_paired_b32 2, -4        ; encoding: [0x00,0x00,0x35,0xcc,0x82,0x88,0x01,0x04]
+// GFX1250: v_wmma_ld_scale_paired_b32 2, -4        ; encoding: [0x00,0x00,0x35,0xcc,0x82,0x88,0x01,0x02]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale_paired_b32 v1, v2 matrix_a_scale:MATRIX_SCALE_ROW0 matrix_b_scale:MATRIX_SCALE_ROW0
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2       ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x04]
+// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2       ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x02]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale_paired_b32 s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x04]
+// GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x02]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale_paired_b32 s0, s0 matrix_a_reuse
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_a_reuse ; encoding: [0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x04]
+// GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_a_reuse ; encoding: [0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x02]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale_paired_b32 s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x04]
+// GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x02]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale_paired_b32 s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x0c]
+// GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x0a]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale_paired_b32 s0, s0 matrix_b_reuse
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x04]
+// GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x02]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale_paired_b32 s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x0c]
+// GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x0a]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale_paired_b32 v1, v2 matrix_a_scale:MATRIX_SCALE_ROW0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E8
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x0c]
+// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x0a]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x2c]
+// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x2a]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x4c]
+// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x4a]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E8
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2       ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x04]
+// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2       ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x02]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x04]
+// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x02]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x04]
+// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x02]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale_paired_b32 v1, v2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 matrix_a_reuse matrix_b_reuse
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 matrix_a_reuse matrix_b_reuse ; encoding: [0x00,0x6a,0x35,0xcc,0x01,0x05,0x02,0x2c]
+// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 matrix_a_reuse matrix_b_reuse ; encoding: [0x00,0x6a,0x35,0xcc,0x01,0x05,0x02,0x2a]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5]
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x04]
+// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x02]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale16_paired_b64 s[2:3], s[4:5]
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale16_paired_b64 s[2:3], s[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x08,0x00,0x04]
+// GFX1250: v_wmma_ld_scale16_paired_b64 s[2:3], s[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x08,0x00,0x02]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale16_paired_b64 2, -4
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale16_paired_b64 2, -4      ; encoding: [0x00,0x00,0x3a,0xcc,0x82,0x88,0x01,0x04]
+// GFX1250: v_wmma_ld_scale16_paired_b64 2, -4      ; encoding: [0x00,0x00,0x3a,0xcc,0x82,0x88,0x01,0x02]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW0 matrix_b_scale:MATRIX_SCALE_ROW0
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x04]
+// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x02]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x04]
+// GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x02]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_a_reuse
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_a_reuse ; encoding: [0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x04]
+// GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_a_reuse ; encoding: [0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x02]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x04]
+// GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x02]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x0c]
+// GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x0a]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_b_reuse
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x04]
+// GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x02]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x0c]
+// GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x0a]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E8
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x0c]
+// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x0a]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x2c]
+// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x2a]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x4c]
+// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x4a]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E8
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x04]
+// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x02]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x04]
+// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x02]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x04]
+// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x02]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 matrix_a_reuse matrix_b_reuse
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 matrix_a_reuse matrix_b_reuse ; encoding: [0x00,0x6a,0x3a,0xcc,0x02,0x09,0x02,0x2c]
+// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 matrix_a_reuse matrix_b_reuse ; encoding: [0x00,0x6a,0x3a,0xcc,0x02,0x09,0x02,0x2a]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v1, v2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1]
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v1, v2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x35,0xcc,0x01,0x05,0x02,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94]
+// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v1, v2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x35,0xcc,0x01,0x05,0x02,0x0a,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], s1, s2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1]
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], s1, s2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x04,0x00,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94]
+// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], s1, s2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x04,0x00,0x0a,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_FP8
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_BF8
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x08,0x33,0xcc,0x00,0x01,0x02,0x04]
+// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x08,0x33,0xcc,0x00,0x01,0x02,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_FP6
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x10,0x33,0xcc,0x00,0x01,0x02,0x04]
+// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x10,0x33,0xcc,0x00,0x01,0x02,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_BF6
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x18,0x33,0xcc,0x00,0x01,0x02,0x04]
+// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x18,0x33,0xcc,0x00,0x01,0x02,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:7], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_FP4
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:7], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x20,0x33,0xcc,0x00,0x01,0x02,0x04]
+// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:7], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x20,0x33,0xcc,0x00,0x01,0x02,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_FP8
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_BF8
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x0c]
+// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x0c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_FP6
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x14]
+// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x14]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_BF6
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x1c]
+// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x1c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:7], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_FP4
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:7], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x33,0xcc,0x00,0x01,0x02,0x04]
+// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:7], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x40,0x33,0xcc,0x00,0x01,0x02,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW0
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_reuse
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_reuse ; encoding: [0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_reuse ; encoding: [0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW0
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x0c,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x0a,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_reuse
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x0c,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x0a,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E8
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
+// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x02,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x24,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
+// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x22,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x44,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
+// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x42,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
+// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x02,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
+// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x02,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v1, v2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1]
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v1, v2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x05,0x02,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94]
+// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v1, v2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x05,0x02,0x0a,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v[2:3], v[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1]
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v[2:3], v[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x3a,0xcc,0x02,0x09,0x02,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94]
+// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v[2:3], v[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x3a,0xcc,0x02,0x09,0x02,0x0a,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], s[2:3], s[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1]
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], s[2:3], s[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x08,0x00,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94]
+// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], s[2:3], s[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x08,0x00,0x0a,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1]
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_FP8
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_BF8
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x08,0x33,0xcc,0x00,0x01,0x02,0x04]
+// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x08,0x33,0xcc,0x00,0x01,0x02,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_FP6
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x10,0x33,0xcc,0x00,0x01,0x02,0x04]
+// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x10,0x33,0xcc,0x00,0x01,0x02,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_BF6
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x18,0x33,0xcc,0x00,0x01,0x02,0x04]
+// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x18,0x33,0xcc,0x00,0x01,0x02,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:7], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_FP4
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:7], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x20,0x33,0xcc,0x00,0x01,0x02,0x04]
+// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:7], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x20,0x33,0xcc,0x00,0x01,0x02,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_FP8
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_BF8
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x0c]
+// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x0c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_FP6
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x14]
+// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x14]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_BF6
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x1c]
+// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x1c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:7], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_FP4
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:7], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x33,0xcc,0x00,0x01,0x02,0x04]
+// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:7], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x40,0x33,0xcc,0x00,0x01,0x02,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW0
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_reuse
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_reuse ; encoding: [0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_reuse ; encoding: [0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW0
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x0c,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x0a,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_reuse
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x0c,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x0a,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E8
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
+// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x02,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x24,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
+// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x22,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x44,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
+// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x42,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
+// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x02,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
+// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x02,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v[2:3], v[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1]
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v[2:3], v[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x09,0x02,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94]
+// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v[2:3], v[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x09,0x02,0x0a,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19]
@@ -1750,170 +1750,170 @@ v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_lo:[0,0,1] neg_hi:
 
 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1]
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x35,0xcc,0x01,0x05,0x02,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c]
+// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x35,0xcc,0x01,0x05,0x02,0x0a,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], s1, s2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1]
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], s1, s2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x04,0x00,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c]
+// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], s1, s2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x04,0x00,0x0a,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
+// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW0
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
+// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
+// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_a_reuse
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_a_reuse ; encoding: [0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
+// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_a_reuse ; encoding: [0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
+// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW0
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
+// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x0c,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
+// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x0a,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_b_reuse
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
+// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x0c,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
+// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x0a,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E8
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x04,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
+// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x02,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x24,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
+// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x22,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x44,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
+// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x42,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x04,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
+// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x02,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x04,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
+// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x02,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1]
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x05,0x02,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c]
+// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x05,0x02,0x0a,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1]
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x3a,0xcc,0x02,0x09,0x02,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c]
+// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x3a,0xcc,0x02,0x09,0x02,0x0a,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], s[2:3], s[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1]
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], s[2:3], s[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x08,0x00,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c]
+// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], s[2:3], s[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x08,0x00,0x0a,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1]
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
+// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW0
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
+// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
+// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_a_reuse
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_a_reuse ; encoding: [0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
+// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_a_reuse ; encoding: [0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
+// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW0
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
+// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x0c,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
+// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x0a,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_b_reuse
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
+// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x0c,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
+// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x0a,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E8
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
+// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x02,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x24,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
+// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x22,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x44,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
+// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x42,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
+// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x02,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
+// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x02,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32
 
 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1]
 // GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x09,0x02,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c]
+// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x09,0x02,0x0a,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c]
 // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32

diff  --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_wmma_w32.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_wmma_w32.txt
index 1dea7e138009e..248e39e253aae 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_wmma_w32.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_wmma_w32.txt
@@ -593,232 +593,232 @@
 # GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] clamp ; encoding: [0x10,0x80,0x72,0xcc,0x00,0x11,0x42,0x1c]
 
 0x00,0x00,0x3a,0xcc,0x82,0x88,0x01,0x04
-# GFX1250: v_wmma_ld_scale16_paired_b64 2, -4      ; encoding: [0x00,0x00,0x3a,0xcc,0x82,0x88,0x01,0x04]
+# GFX1250: v_wmma_ld_scale16_paired_b64 2, -4      ; encoding: [0x00,0x00,0x3a,0xcc,0x82,0x88,0x01,0x02]
 
 0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x04
-# GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_a_reuse ; encoding: [0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x04]
+# GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_a_reuse ; encoding: [0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x02]
 
 0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x04
-# GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x04]
+# GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x02]
 
 0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x04
-# GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x04]
+# GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x02]
 
 0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x04
-# GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x04]
+# GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x02]
 
 0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x0c
-# GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x0c]
+# GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x0a]
 
 0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x0c
-# GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x0c]
+# GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x0a]
 
 0x00,0x00,0x3a,0xcc,0x02,0x08,0x00,0x04
-# GFX1250: v_wmma_ld_scale16_paired_b64 s[2:3], s[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x08,0x00,0x04]
+# GFX1250: v_wmma_ld_scale16_paired_b64 s[2:3], s[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x08,0x00,0x02]
 
 0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x04
-# GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x04]
+# GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x02]
 
 0x00,0x6a,0x3a,0xcc,0x02,0x09,0x02,0x2c
-# GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 matrix_a_reuse matrix_b_reuse ; encoding: [0x00,0x6a,0x3a,0xcc,0x02,0x09,0x02,0x2c]
+# GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 matrix_a_reuse matrix_b_reuse ; encoding: [0x00,0x6a,0x3a,0xcc,0x02,0x09,0x02,0x2a]
 
 0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x0c
-# GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x0c]
+# GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x0a]
 
 0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x4c
-# GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x4c]
+# GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x4a]
 
 0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x2c
-# GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x2c]
+# GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x2a]
 
 0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x04
-# GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x04]
+# GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x02]
 
 0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x04
-# GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x04]
+# GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x02]
 
 0x00,0x00,0x35,0xcc,0x82,0x88,0x01,0x04
-# GFX1250: v_wmma_ld_scale_paired_b32 2, -4        ; encoding: [0x00,0x00,0x35,0xcc,0x82,0x88,0x01,0x04]
+# GFX1250: v_wmma_ld_scale_paired_b32 2, -4        ; encoding: [0x00,0x00,0x35,0xcc,0x82,0x88,0x01,0x02]
 
 0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x04
-# GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_a_reuse ; encoding: [0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x04]
+# GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_a_reuse ; encoding: [0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x02]
 
 0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x04
-# GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x04]
+# GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x02]
 
 0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x04
-# GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x04]
+# GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x02]
 
 0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x04
-# GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x04]
+# GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x02]
 
 0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x0c
-# GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x0c]
+# GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x0a]
 
 0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x0c
-# GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x0c]
+# GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x0a]
 
 0x00,0x00,0x35,0xcc,0x01,0x04,0x00,0x04
-# GFX1250: v_wmma_ld_scale_paired_b32 s1, s2       ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x04,0x00,0x04]
+# GFX1250: v_wmma_ld_scale_paired_b32 s1, s2       ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x04,0x00,0x02]
 
 0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x04
-# GFX1250: v_wmma_ld_scale_paired_b32 v1, v2       ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x04]
+# GFX1250: v_wmma_ld_scale_paired_b32 v1, v2       ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x02]
 
 0x00,0x6a,0x35,0xcc,0x01,0x05,0x02,0x2c
-# GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 matrix_a_reuse matrix_b_reuse ; encoding: [0x00,0x6a,0x35,0xcc,0x01,0x05,0x02,0x2c]
+# GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 matrix_a_reuse matrix_b_reuse ; encoding: [0x00,0x6a,0x35,0xcc,0x01,0x05,0x02,0x2a]
 
 0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x0c
-# GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x0c]
+# GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x0a]
 
 0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x4c
-# GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x4c]
+# GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x4a]
 
 0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x2c
-# GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x2c]
+# GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x2a]
 
 0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x04
-# GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x04]
+# GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x02]
 
 0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x04
-# GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x04]
+# GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x02]
 
 0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x18,0x33,0xcc,0x00,0x01,0x02,0x04
-# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x18,0x33,0xcc,0x00,0x01,0x02,0x04]
+# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x18,0x33,0xcc,0x00,0x01,0x02,0x04]
 
 0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x10,0x33,0xcc,0x00,0x01,0x02,0x04
-# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x10,0x33,0xcc,0x00,0x01,0x02,0x04]
+# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x10,0x33,0xcc,0x00,0x01,0x02,0x04]
 
 0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x1c
-# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x1c]
+# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x1c]
 
 0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x14
-# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x14]
+# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x14]
 
 0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04
-# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 
 0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x08,0x33,0xcc,0x00,0x01,0x02,0x04
-# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x08,0x33,0xcc,0x00,0x01,0x02,0x04]
+# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x08,0x33,0xcc,0x00,0x01,0x02,0x04]
 
 0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04
-# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_reuse ; encoding: [0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_reuse ; encoding: [0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 
 0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04
-# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 
 0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04
-# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 
 0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x0c
-# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x0c]
+# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x0c]
 
 0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04
-# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 
 0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x0c,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04
-# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x0c,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x0a,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 
 0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x0c,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04
-# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x0c,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x0a,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 
 0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x33,0xcc,0x00,0x01,0x02,0x04
-# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:7], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x33,0xcc,0x00,0x01,0x02,0x04]
+# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:7], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x40,0x33,0xcc,0x00,0x01,0x02,0x04]
 
 0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x20,0x33,0xcc,0x00,0x01,0x02,0x04
-# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:7], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x20,0x33,0xcc,0x00,0x01,0x02,0x04]
+# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:7], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x20,0x33,0xcc,0x00,0x01,0x02,0x04]
 
 0x00,0x68,0x3a,0xcc,0x02,0x08,0x00,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94
-# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], s[2:3], s[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x08,0x00,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94]
+# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], s[2:3], s[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x08,0x00,0x0a,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94]
 
 0x00,0x68,0x3a,0xcc,0x02,0x09,0x02,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94
-# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v[2:3], v[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x09,0x02,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94]
+# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v[2:3], v[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x09,0x02,0x0a,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94]
 
 0x00,0x08,0x3a,0xcc,0x02,0x09,0x02,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94
-# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v[2:3], v[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x3a,0xcc,0x02,0x09,0x02,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94]
+# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v[2:3], v[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x3a,0xcc,0x02,0x09,0x02,0x0a,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94]
 
 0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04
-# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
+# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x02,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
 
 0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x44,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04
-# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x44,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
+# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x42,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
 
 0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x24,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04
-# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x24,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
+# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x22,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
 
 0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04
-# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
+# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x02,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
 
 0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04
-# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
+# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x02,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
 
 0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x18,0x33,0xcc,0x00,0x01,0x02,0x04
-# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x18,0x33,0xcc,0x00,0x01,0x02,0x04]
+# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x18,0x33,0xcc,0x00,0x01,0x02,0x04]
 
 0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x10,0x33,0xcc,0x00,0x01,0x02,0x04
-# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x10,0x33,0xcc,0x00,0x01,0x02,0x04]
+# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x10,0x33,0xcc,0x00,0x01,0x02,0x04]
 
 0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x1c
-# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x1c]
+# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x1c]
 
 0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x14
-# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x14]
+# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x14]
 
 0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04
-# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 
 0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x08,0x33,0xcc,0x00,0x01,0x02,0x04
-# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x08,0x33,0xcc,0x00,0x01,0x02,0x04]
+# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x08,0x33,0xcc,0x00,0x01,0x02,0x04]
 
 0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04
-# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_reuse ; encoding: [0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_reuse ; encoding: [0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 
 0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04
-# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 
 0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04
-# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 
 0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x0c
-# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x0c]
+# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x0c]
 
 0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04
-# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 
 0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x0c,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04
-# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x0c,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x0a,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 
 0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x0c,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04
-# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x0c,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
+# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x0a,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04]
 
 0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x33,0xcc,0x00,0x01,0x02,0x04
-# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:7], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x33,0xcc,0x00,0x01,0x02,0x04]
+# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:7], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x40,0x33,0xcc,0x00,0x01,0x02,0x04]
 
 0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x20,0x33,0xcc,0x00,0x01,0x02,0x04
-# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:7], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x20,0x33,0xcc,0x00,0x01,0x02,0x04]
+# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:7], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x20,0x33,0xcc,0x00,0x01,0x02,0x04]
 
 0x00,0x68,0x35,0xcc,0x01,0x04,0x00,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94
-# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], s1, s2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x04,0x00,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94]
+# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], s1, s2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x04,0x00,0x0a,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94]
 
 0x00,0x68,0x35,0xcc,0x01,0x05,0x02,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94
-# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v1, v2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x05,0x02,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94]
+# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v1, v2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x05,0x02,0x0a,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94]
 
 0x00,0x08,0x35,0xcc,0x01,0x05,0x02,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94
-# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v1, v2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x35,0xcc,0x01,0x05,0x02,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94]
+# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v1, v2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x35,0xcc,0x01,0x05,0x02,0x0a,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94]
 
 0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04
-# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
+# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x02,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
 
 0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x44,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04
-# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x44,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
+# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x42,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
 
 0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x24,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04
-# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x24,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
+# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x22,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
 
 0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04
-# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
+# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x02,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
 
 0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04
-# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
+# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x02,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04]
 
 0x10,0x00,0x87,0xcc,0x00,0x11,0xca,0x1b
 # GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x87,0xcc,0x00,0x11,0xca,0x1b]
@@ -1007,91 +1007,91 @@
 # GFX1250: v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x04,0x44,0x88,0xcc,0x00,0x05,0x12,0x9c]
 
 0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c
-# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
+# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
 
 0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c
-# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_a_reuse ; encoding: [0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
+# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_a_reuse ; encoding: [0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
 
 0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c
-# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
+# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
 
 0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c
-# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
+# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
 
 0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c
-# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
+# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x02,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
 
 0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x0c,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c
-# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x0c,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
+# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x0a,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
 
 0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x0c,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c
-# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x0c,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
+# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x0a,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
 
 0x00,0x68,0x35,0xcc,0x01,0x04,0x00,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c
-# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], s1, s2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x04,0x00,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c]
+# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], s1, s2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x04,0x00,0x0a,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c]
 
 0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x0c,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c
-# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x0c,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
+# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x0a,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
 
 0x00,0x68,0x35,0xcc,0x01,0x05,0x02,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c
-# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x05,0x02,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c]
+# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x05,0x02,0x0a,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c]
 
 0x00,0x08,0x35,0xcc,0x01,0x05,0x02,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c
-# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x35,0xcc,0x01,0x05,0x02,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c]
+# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x35,0xcc,0x01,0x05,0x02,0x0a,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c]
 
 0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x44,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c
-# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x44,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
+# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x42,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
 
 0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x24,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c
-# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x24,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
+# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x22,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
 
 0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x04,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c
-# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x04,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
+# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x02,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
 
 0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x04,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c
-# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x04,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
+# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x02,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
 
 0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c
-# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
+# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
 
 0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c
-# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_a_reuse ; encoding: [0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
+# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_a_reuse ; encoding: [0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
 
 0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c
-# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
+# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
 
 0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c
-# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
+# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
 
 0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c
-# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
+# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x02,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
 
 0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x0c,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c
-# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x0c,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
+# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x0a,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
 
 0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x0c,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c
-# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x0c,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
+# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x0a,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c]
 
 0x00,0x68,0x3a,0xcc,0x02,0x08,0x00,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c
-# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], s[2:3], s[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x08,0x00,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c]
+# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], s[2:3], s[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x08,0x00,0x0a,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c]
 
 0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c
-# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
+# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x02,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
 
 0x00,0x68,0x3a,0xcc,0x02,0x09,0x02,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c
-# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x09,0x02,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c]
+# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x09,0x02,0x0a,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c]
 
 0x00,0x08,0x3a,0xcc,0x02,0x09,0x02,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c
-# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x3a,0xcc,0x02,0x09,0x02,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c]
+# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x3a,0xcc,0x02,0x09,0x02,0x0a,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c]
 
 0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x44,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c
-# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x44,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
+# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x42,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
 
 0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x24,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c
-# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x24,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
+# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x22,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
 
 0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c
-# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
+# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x02,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
 
 0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c
-# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]
+# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x02,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c]


        


More information about the llvm-commits mailing list