[llvm] b80b025 - AMDGPU: Implement MC layer support for gfx1250 wmma instructions. (#148570)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 15 00:49:01 PDT 2025


Author: Changpeng Fang
Date: 2025-07-15T00:48:57-07:00
New Revision: b80b02536b20d2170820e56ef776f43640ee2495

URL: https://github.com/llvm/llvm-project/commit/b80b02536b20d2170820e56ef776f43640ee2495
DIFF: https://github.com/llvm/llvm-project/commit/b80b02536b20d2170820e56ef776f43640ee2495.diff

LOG: AMDGPU: Implement MC layer support for gfx1250 wmma instructions. (#148570)

Regular wmma/swmmac plus matrix reuse only.

---------

Co-authored-by: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Co-authored-by: Shilei Tian <Shilei.Tian at amd.com>

Added: 
    llvm/test/MC/AMDGPU/gfx1250_asm_wmma_w32.s
    llvm/test/MC/AMDGPU/gfx1250_asm_wmma_w32_err.s
    llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_wmma_w32.txt

Modified: 
    llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
    llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
    llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
    llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
    llvm/lib/Target/AMDGPU/SIInstrInfo.td
    llvm/lib/Target/AMDGPU/SIRegisterInfo.td
    llvm/lib/Target/AMDGPU/VOP3PInstructions.td
    llvm/lib/Target/AMDGPU/VOPInstructions.td

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 6439230b8769f..8ac9b707c1349 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -157,6 +157,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
     ImmTyNegHi,
     ImmTyIndexKey8bit,
     ImmTyIndexKey16bit,
+    ImmTyIndexKey32bit,
     ImmTyDPP8,
     ImmTyDppCtrl,
     ImmTyDppRowMask,
@@ -174,8 +175,10 @@ class AMDGPUOperand : public MCParsedAsmOperand {
     ImmTyWaitEXP,
     ImmTyWaitVAVDst,
     ImmTyWaitVMVSrc,
-    ImmTyByteSel,
     ImmTyBitOp3,
+    ImmTyMatrixAReuse,
+    ImmTyMatrixBReuse,
+    ImmTyByteSel,
   };
 
   // Immediate operand kind.
@@ -419,6 +422,9 @@ class AMDGPUOperand : public MCParsedAsmOperand {
   bool isCPol() const { return isImmTy(ImmTyCPol); }
   bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
   bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
+  bool isIndexKey32bit() const { return isImmTy(ImmTyIndexKey32bit); }
+  bool isMatrixAReuse() const { return isImmTy(ImmTyMatrixAReuse); }
+  bool isMatrixBReuse() const { return isImmTy(ImmTyMatrixBReuse); }
   bool isTFE() const { return isImmTy(ImmTyTFE); }
   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
   bool isDppFI() const { return isImmTy(ImmTyDppFI); }
@@ -747,6 +753,10 @@ class AMDGPUOperand : public MCParsedAsmOperand {
     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
   }
 
+  bool isVISrc_512_f64() const {
+    return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
+  }
+
   bool isVISrc_128B16() const {
     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
   }
@@ -1116,6 +1126,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
     case ImmTyCPol: OS << "CPol"; break;
     case ImmTyIndexKey8bit: OS << "index_key"; break;
     case ImmTyIndexKey16bit: OS << "index_key"; break;
+    case ImmTyIndexKey32bit: OS << "index_key"; break;
     case ImmTyTFE: OS << "TFE"; break;
     case ImmTyD16: OS << "D16"; break;
     case ImmTyFORMAT: OS << "FORMAT"; break;
@@ -1162,8 +1173,10 @@ class AMDGPUOperand : public MCParsedAsmOperand {
     case ImmTyWaitEXP: OS << "WaitEXP"; break;
     case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
     case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
-    case ImmTyByteSel: OS << "ByteSel" ; break;
     case ImmTyBitOp3: OS << "BitOp3"; break;
+    case ImmTyMatrixAReuse: OS << "ImmTyMatrixAReuse"; break;
+    case ImmTyMatrixBReuse: OS << "ImmTyMatrixBReuse"; break;
+    case ImmTyByteSel: OS << "ByteSel" ; break;
     }
     // clang-format on
   }
@@ -1700,6 +1713,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
                                AMDGPUOperand::ImmTy ImmTy);
   ParseStatus parseIndexKey8bit(OperandVector &Operands);
   ParseStatus parseIndexKey16bit(OperandVector &Operands);
+  ParseStatus parseIndexKey32bit(OperandVector &Operands);
 
   ParseStatus parseDfmtNfmt(int64_t &Format);
   ParseStatus parseUfmt(int64_t &Format);
@@ -7153,7 +7167,9 @@ ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
   if (!Res.isSuccess())
     return Res;
 
-  if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1))
+  if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
+       ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
+      (ImmVal < 0 || ImmVal > 1))
     return Error(Loc, Twine("out of range ", StringRef(Pref)));
 
   if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
@@ -7171,6 +7187,10 @@ ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
   return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
 }
 
+ParseStatus AMDGPUAsmParser::parseIndexKey32bit(OperandVector &Operands) {
+  return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);
+}
+
 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
 // values to live in a joint format operand in the MCInst encoding.
 ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
@@ -9272,6 +9292,14 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
                           DefaultVal);
   }
 
+  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_a_reuse))
+    addOptionalImmOperand(Inst, Operands, OptIdx,
+                          AMDGPUOperand::ImmTyMatrixAReuse, 0);
+
+  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_b_reuse))
+    addOptionalImmOperand(Inst, Operands, OptIdx,
+                          AMDGPUOperand::ImmTyMatrixBReuse, 0);
+
   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
   if (NegLoIdx != -1)
     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
@@ -9378,6 +9406,10 @@ void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
     addOptionalImmOperand(Inst, Operands, OptIdx,
                           AMDGPUOperand::ImmTyIndexKey16bit);
 
+  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_32bit))
+    addOptionalImmOperand(Inst, Operands, OptIdx,
+                          AMDGPUOperand::ImmTyIndexKey32bit);
+
   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
 

diff  --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index cb6319ed627ca..ec9248b972ec4 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -1332,6 +1332,16 @@ void AMDGPUInstPrinter::printIndexKey16bit(const MCInst *MI, unsigned OpNo,
   O << " index_key:" << Imm;
 }
 
+void AMDGPUInstPrinter::printIndexKey32bit(const MCInst *MI, unsigned OpNo,
+                                           const MCSubtargetInfo &STI,
+                                           raw_ostream &O) {
+  auto Imm = MI->getOperand(OpNo).getImm() & 0x7;
+  if (Imm == 0)
+    return;
+
+  O << " index_key:" << Imm;
+}
+
 void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
                                         const MCSubtargetInfo &STI,
                                         raw_ostream &O) {

diff  --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index fb803b1f81342..e3299a618e882 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -132,6 +132,8 @@ class AMDGPUInstPrinter : public MCInstPrinter {
                          const MCSubtargetInfo &STI, raw_ostream &O);
   void printIndexKey16bit(const MCInst *MI, unsigned OpNo,
                           const MCSubtargetInfo &STI, raw_ostream &O);
+  void printIndexKey32bit(const MCInst *MI, unsigned OpNo,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
   void printInterpSlot(const MCInst *MI, unsigned OpNo,
                        const MCSubtargetInfo &STI, raw_ostream &O);
   void printInterpAttr(const MCInst *MI, unsigned OpNo,

diff  --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
index 4bb3942936f04..b87bb94c70dcb 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
@@ -381,9 +381,11 @@ void AMDGPUMCCodeEmitter::encodeInstruction(const MCInst &MI,
 
   // Set unused op_sel_hi bits to 1 for VOP3P and MAI instructions.
   // Note that accvgpr_read/write are MAI, have src0, but do not use op_sel.
-  if ((Desc.TSFlags & SIInstrFlags::VOP3P) ||
-      Opcode == AMDGPU::V_ACCVGPR_READ_B32_vi ||
-      Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_vi) {
+  if (((Desc.TSFlags & SIInstrFlags::VOP3P) ||
+       Opcode == AMDGPU::V_ACCVGPR_READ_B32_vi ||
+       Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_vi) &&
+      // Matrix B reuse operand reuses op_sel_hi.
+      !AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::matrix_b_reuse)) {
     Encoding |= getImplicitOpSelHiEncoding(Opcode);
   }
 

diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index e5ce0a9d0e971..391cc968e9d8b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1247,6 +1247,7 @@ def op_sel_hi0 : ArrayOperand0<"op_sel_hi", "OpSelHi">;
 def neg_lo0 : ArrayOperand0<"neg_lo", "NegLo">;
 def neg_hi0 : ArrayOperand0<"neg_hi", "NegHi">;
 
+def IndexKey32bit : CustomOperand<i32, 1>;
 def IndexKey16bit : CustomOperand<i32, 1>;
 def IndexKey8bit : CustomOperand<i32, 1>;
 
@@ -1302,6 +1303,9 @@ let PrintMethod = "printBitOp3" in
 def BitOp3 : NamedIntOperand<"bitop3">;
 def bitop3_0 : DefaultOperand<BitOp3, 0>;
 
+def MatrixAReuse : NamedBitOperand<"matrix_a_reuse">;
+def MatrixBReuse : NamedBitOperand<"matrix_b_reuse">;
+
 class KImmFPOperand<ValueType vt> : ImmOperand<vt> {
   let OperandNamespace = "AMDGPU";
   let OperandType = "OPERAND_KIMM"#vt.Size;
@@ -1641,6 +1645,7 @@ def WMMAModsF16NegAbs  : ComplexPattern<untyped, 2, "SelectWMMAModsF16NegAbs">;
 def WMMAVISrc  : ComplexPattern<untyped, 1, "SelectWMMAVISrc">;
 def SWMMACIndex8  : ComplexPattern<untyped, 2, "SelectSWMMACIndex8">;
 def SWMMACIndex16  : ComplexPattern<untyped, 2, "SelectSWMMACIndex16">;
+def SWMMACIndex32  : ComplexPattern<untyped, 2, "SelectSWMMACIndex32">;
 
 def VOP3OpSel  : ComplexPattern<untyped, 2, "SelectVOP3OpSel">;
 
@@ -2654,6 +2659,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
                                isModifierType<Src2VT>.ret,
                                HasOMod);
   field bit HasNeg = HasModifiers;
+  field bit HasMatrixReuse = 0;
 
   field bit HasSrc0Mods = HasModifiers;
   field bit HasSrc1Mods = !if(HasModifiers, !or(HasSrc1FloatMods, HasSrc1IntMods), 0);

diff  --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index d24c301fc1e51..c194e5c255d4d 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -1294,6 +1294,7 @@ def VISrc_256_f32 : SrcRegOrImm9 <VReg_256, "OPERAND_REG_INLINE_C_FP32">;
 def VISrc_256_f64 : SrcRegOrImm9 <VReg_256, "OPERAND_REG_INLINE_C_FP64">;
 def VISrc_512_b32 : SrcRegOrImm9 <VReg_512, "OPERAND_REG_INLINE_C_INT32">;
 def VISrc_512_f32 : SrcRegOrImm9 <VReg_512, "OPERAND_REG_INLINE_C_FP32">;
+def VISrc_512_f64 : SrcRegOrImm9 <VReg_512, "OPERAND_REG_INLINE_C_FP64">;
 def VISrc_1024_b32 : SrcRegOrImm9 <VReg_1024, "OPERAND_REG_INLINE_C_INT32">;
 def VISrc_1024_f32 : SrcRegOrImm9 <VReg_1024, "OPERAND_REG_INLINE_C_FP32">;
 

diff  --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 31997f803dfc6..41878a71ad3e6 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -1223,6 +1223,8 @@ class WMMAOpcodeMapping<Instruction TwoAddr, Instruction ThreeAddr> {
   Instruction Opcode2Addr = TwoAddr;
   Instruction Opcode3Addr = ThreeAddr;
   Predicate WaveSizePredicate;
+  Predicate SubtargetPredicate;
+  field bit is_wmma_xdl;
 }
 
 def WMMAOpcode : GenericEnum {
@@ -1315,28 +1317,39 @@ let WaveSizePredicate = isWave64 in {
 }
 
 class VOP3PWMMA_Profile<list<ValueType> ArgTy, bit _IsSWMMAC, int _IndexType,
-                        bit _IsIU, bit _IsFP8BF8>
+                             bit _IsIU, bit _IsFP8BF8XF32, bit _Has_ImodOp = 0,
+                             bit _HasMatrixReuse = 0, bit _IsF4 = 0>
     : VOP3P_Profile<VOPProfile<ArgTy>> {
   bit IsIU = _IsIU;
-  bit IsFP8BF8 = _IsFP8BF8;
-  bit IsF16BF16 = !not(!or(IsIU, IsFP8BF8));
+  bit NoABMods = !or(_IsFP8BF8XF32, _IsF4); // No IMOD support for A and B
+  bit IsXF32 = !and(_IsFP8BF8XF32, !eq(ArgTy[1], v8f32));
 
   int IndexType = _IndexType;
+  let HasMatrixReuse = _HasMatrixReuse;
 
+  bit HasIModOp = _Has_ImodOp;
+  let HasClamp = !and(IsIU, !not(HasIModOp));
   let IsPacked = 1;
   let IsWMMA = !not(_IsSWMMAC);
   let IsSWMMAC = _IsSWMMAC;
 
-  bit IsAB_F16 = !and(IsF16BF16, ArgTy[1].isFP);
-  bit IsAB_BF16 = !and(IsF16BF16, isIntType<ArgTy[1]>.ret);
+  bit IsAB_F64  = !or(!eq(ArgTy[1], v2f64), !eq(ArgTy[1], v4f64));
+  bit IsAB_F32  = !eq(ArgTy[1], v2f32);
+  bit IsAB_F16 = !or(!eq(ArgTy[1], v16f16), !eq(ArgTy[1], v8f16), !eq(ArgTy[1], v4f16));
+  bit IsAB_BF16 = !or(!eq(ArgTy[1], v16i16), !eq(ArgTy[1], v8i16), !eq(ArgTy[1], v4i16),
+                      !eq(ArgTy[1], v16bf16), !eq(ArgTy[1], v8bf16), !eq(ArgTy[1], v4bf16));
+  bit IsF16BF16 = !or(IsAB_F16, IsAB_BF16);
+
+  bit IsC_F64 = !eq(ArgTy[3], v8f64);
   bit IsC_F32 = !or(!eq(ArgTy[3], v8f32), !eq(ArgTy[3], v4f32));
-  bit IsC_BF16 = !or(!eq(ArgTy[3], v8i16), !eq(ArgTy[3], v4i16));
+  bit IsC_BF16 = !or(!eq(ArgTy[3], v8i16), !eq(ArgTy[3], v4i16),
+                     !eq(ArgTy[3], v8bf16), !eq(ArgTy[3], v4bf16));
   bit IsC_F16 = !or(!eq(ArgTy[3], v8f16), !eq(ArgTy[3], v4f16));
 
-  bit NegLo01 = !or(IsF16BF16, IsIU);
-  bit NegLo2 = !and(!or(IsF16BF16, IsFP8BF8), IsWMMA);
-  bit NegHi01 = IsF16BF16;
-  bit NegHi2 = !and(!or(IsF16BF16, IsFP8BF8), IsWMMA);
+  bit NegLo01 = !not(NoABMods);
+  bit NegLo2 = !and(!not(IsIU), !not(IsXF32), IsWMMA);
+  bit NegHi01 = IsF16BF16; // Only F16BF16 can have neg_hi[0:1]
+  bit NegHi2 = !and(!not(IsIU), !not(IsXF32), IsWMMA);
   bit NegLoAny = !or(NegLo01, NegLo2);
   bit NegHiAny = !or(NegHi01, NegHi2);
 
@@ -1345,19 +1358,29 @@ class VOP3PWMMA_Profile<list<ValueType> ArgTy, bit _IsSWMMAC, int _IndexType,
   let Src1RC64 = !cast<RegisterOperand>("VRegSrc_"#ArgTy[2].Size);
   let Src2RC64 = !if(IsSWMMAC, DstRC,
                                !cast<RegisterOperand>("VISrc_"#ArgTy[3].Size#
-                                                      !cond(IsC_F32: "_f32",
-                                                            IsC_F16: "_f16",
+                                                      !cond(IsC_F64:  "_f64",
+                                                            IsC_F32:  "_f32",
+                                                            IsC_F16:  "_f16",
                                                             IsC_BF16: "_bf16",
                                                             1: "_b32")));
 
   // For f16 and bf16 matrices A and B, each element can be modified by
-  // fneg(neg_lo,neg_hi = 1). For iu4 and iu8 matrices A and B neg_lo is
+  // fneg(neg_lo,neg_hi = 1). For f32 and f64, neg_lo[0:1] is allowed, but
+  // neg_hi[0:1] is ignored. For iu4 and iu8 matrices A and B neg_lo is
   // overloaded to mean unsigned/signed: neg_lo = 0 (u4 and u8) unsigned(zext)
-  // neg_lo = 1 (i4 and i8) signed(sext). For f16, bf16 and f32 matrix C each
-  // element can be modified by fneg(neg_lo = 1) or fabs(neg_hi = 1).
+  // neg_lo = 1 (i4 and i8) signed(sext). For f16, bf16, f32 and f64 matrix C
+  // each element can be modified by fneg(neg_lo = 1) or fabs(neg_hi = 1).
 
   // Opcode             | src0/src1 - matrix A/B | src2 - matrix C or Index
   // ---------------------------------------------------------------------------
+  // wmma f64_f64       | neg_lo for neg A/B     | neg_lo = 1  neg C(f64)
+  //                    | neg_hi ignored         | neg_hi = 1  abs C(f64)
+  // ---------------------------------------------------------------------------
+  // wmma f32_f32       | neg_lo for neg A/B     | neg_lo = 1  neg C(f32)
+  //                    | neg_hi ignored         | neg_hi = 1  abs C(f32)
+  // ---------------------------------------------------------------------------
+  // wmma f32_xf32      | not allowed for xf32   | not allowed
+  // ---------------------------------------------------------------------------
   // wmma f32_f16       | both neg_lo,neg_hi = 1 | neg_lo = 1  neg C(f32)
   // wmma f32_bf16      | neg A/B (f16 or bf16)  | neg_hi = 1  abs C(f32)
   // ---------------------------------------------------------------------------
@@ -1368,7 +1391,10 @@ class VOP3PWMMA_Profile<list<ValueType> ArgTy, bit _IsSWMMAC, int _IndexType,
   //                    | neg_lo = 1 i4/i8(sext) | i32 matrices
   // ---------------------------------------------------------------------------
   // wmma f32_fp8/bf8   | not allowed for        | neg_lo = 1  neg C(f32)
-  // (4 instructions)   | f8 and bf8 matrices    | neg_hi = 1  abs C(f32)
+  //                    | fp8 and bf8 matrices   | neg_hi = 1  abs C(f32)
+  // ---------------------------------------------------------------------------
+  // wmma f16_fp8/bf8   | not allowed for        | neg_lo = 1  neg C(f16)
+  //                    | fp8 and bf8 matrices   | neg_hi = 1  abs C(f16)
   // ---------------------------------------------------------------------------
   // swmmac f32_f16     | both neg_lo,neg_hi = 1 | not allowed for sparse matrix
   // swmmac f32_bf16    | neg A/B (f16 or bf16)  | A Index - matrix C is in dst
@@ -1380,78 +1406,108 @@ class VOP3PWMMA_Profile<list<ValueType> ArgTy, bit _IsSWMMAC, int _IndexType,
   //                    | neg_lo = 1 i4/i8(sext) | A Index - matrix C is in dst
   // ---------------------------------------------------------------------------
   // swmmac f32_fp8/bf8 | not allowed for        | not allowed for sparse matrix
-  // (4 instructions)   | f8 and bf8 matrices    | A Index - matrix C is in dst
+  // swmmac f16_fp8/bf8 | f8 and bf8 matrices    | A Index - matrix C is in dst
+  // ---------------------------------------------------------------------------
 
   // pseudo
 
-  // fp8bf8 wmmas don't use src (0 and 1) modifiers, iu use neg_lo, f16 and bf16
+  // fp8bf8 and xf32 wmmas don't use src (0 and 1) modifiers, iu use neg_lo, f16 and bf16
   // use neg_lo and neg_hi. iu wmmas (C is i32) don't use src 2 modifiers,
   // remaining wmmas(f16, bf16 and f8bf8) use neg_lo and neg_hi for C (C is f32
   // f16 or bf16). swmmac use index_key and don't use src 2 modifiers.
-
-  dag Src0Mods = !if(IsFP8BF8, (ins), (ins PackedF16InputMods:$src0_modifiers));
-  dag Src1Mods = !if(IsFP8BF8, (ins), (ins PackedF16InputMods:$src1_modifiers));
-  dag Src2Mods = !if(IsIU, (ins), (ins PackedF16InputMods:$src2_modifiers));
+  dag Src0Mods = !if(NoABMods, (ins), (ins PackedF16InputMods:$src0_modifiers));
+  dag Src1Mods = !if(NoABMods, (ins), (ins PackedF16InputMods:$src1_modifiers));
+  dag Src2Mods = !if(!or(IsIU, IsXF32, IsSWMMAC), (ins), (ins PackedF16InputMods:$src2_modifiers));
   dag IndexKey = !cond(!eq(IndexType, 0) : (ins),
                        !eq(IndexType, 8) : (ins IndexKey8bit:$index_key_8bit),
-                       !eq(IndexType, 16): (ins IndexKey16bit:$index_key_16bit));
-  dag Clamp = !if(IsIU, (ins Clamp0:$clamp), (ins));
+                       !eq(IndexType, 16): (ins IndexKey16bit:$index_key_16bit),
+                       !eq(IndexType, 32): (ins IndexKey32bit:$index_key_32bit));
+
+  dag MatrixReuse = !if(HasMatrixReuse, (ins MatrixAReuse:$matrix_a_reuse, MatrixBReuse:$matrix_b_reuse), (ins));
+  dag Clamp = !if(HasClamp, (ins Clamp0:$clamp), (ins));
   dag Neg = !cond(!and(NegLoAny, NegHiAny)             : (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi),
                   !and(NegLoAny, !not(NegHiAny))       : (ins neg_lo0:$neg_lo),
                   !and(!not(NegLoAny), !not(NegHiAny)) : (ins));
 
   let InsVOP3P = !con(Src0Mods, (ins Src0RC64:$src0), Src1Mods, (ins Src1RC64:$src1),
                       !cond(IsWMMA   : !con(Src2Mods, (ins Src2RC64:$src2)),
-                            IsSWMMAC : !con((ins DstRC:$srcTiedDef), (ins VRegSrc_32:$src2), IndexKey)),
-                      Clamp, Neg);
+                            IsSWMMAC : !con((ins DstRC:$srcTiedDef),
+                                             !if(!eq(IndexType, 32),
+                                                 (ins VRegSrc_64:$src2),
+                                                 (ins VRegSrc_32:$src2)),
+                                            IndexKey)),
+                      MatrixReuse, Clamp, Neg);
 
   // asm
 
   string IndexKeyAsm = !cond(!eq(IndexType, 0)  : "",
                              !eq(IndexType, 8)  : "$index_key_8bit",
-                             !eq(IndexType, 16) : "$index_key_16bit");
-  string ClampAsm = !if(IsIU, "$clamp", "");
+                             !eq(IndexType, 16) : "$index_key_16bit",
+                             !eq(IndexType, 32) : "$index_key_32bit");
+  string MatrixReuseAsm = !if(HasMatrixReuse, "$matrix_a_reuse$matrix_b_reuse", "");
+  string ClampAsm = !if(HasClamp, "$clamp", "");
   string NegAsm = !cond(!and(NegLoAny, NegHiAny)             : "$neg_lo$neg_hi",
                         !and(NegLoAny, !not(NegHiAny))       : "$neg_lo",
                         !and(!not(NegLoAny), !not(NegHiAny)) : "");
 
-  let AsmVOP3P = "$vdst, $src0, $src1, $src2"#IndexKeyAsm#NegAsm#ClampAsm;
+  let AsmVOP3P = "$vdst, $src0, $src1, $src2"#IndexKeyAsm#MatrixReuseAsm#NegAsm#ClampAsm;
 
   // isel patterns
+  bit IsAB_BF16_IMod0 = !and(IsAB_BF16, !not(HasIModOp));
+  bit IsAB_F16_IMod0 = !and(IsAB_F16, !not(HasIModOp));
+  bit IsAB_F32F64_IMod1  = !and(!or(IsAB_F64, IsAB_F32), HasIModOp);
+  bit IsAB_F16BF16_IMod1 = !and(!or(IsAB_F16, IsAB_BF16), HasIModOp);
+  dag Src0InPat  = !cond(IsAB_F32F64_IMod1  : (ins Src0VT:$src0),
+                         IsAB_F16BF16_IMod1 : (ins Src0VT:$src0),
+                         IsAB_F16_IMod0     : (ins (Src0VT (WMMAModsF16Neg Src0VT:$src0, i32:$src0_modifiers))),
+                         IsAB_BF16_IMod0    : (ins Src0VT:$src0),
+                         IsIU               : (ins (VOP3PModsNeg i32:$src0_modifiers), Src0VT:$src0),
+                         NoABMods           : (ins Src0VT:$src0));
+  dag Src0OutPat = !cond(IsAB_F32F64_IMod1  : (ins Src0VT:$src0),
+                         IsAB_F16BF16_IMod1 : (ins Src0VT:$src0),
+                         IsAB_F16_IMod0     : (ins i32:$src0_modifiers, Src0VT:$src0),
+                         IsAB_BF16_IMod0    : (ins (i32 8), Src0VT:$src0),
+                         IsIU               : (ins i32:$src0_modifiers, Src0VT:$src0),
+                         NoABMods           : (ins Src0VT:$src0));
+  dag Src1InPat  = !cond(IsAB_F32F64_IMod1  : (ins Src1VT:$src1),
+                         IsAB_F16BF16_IMod1 : (ins Src1VT:$src1),
+                         IsAB_F16_IMod0     : (ins (Src1VT (WMMAModsF16Neg Src1VT:$src1, i32:$src1_modifiers))),
+                         IsAB_BF16_IMod0    : (ins Src1VT:$src1),
+                         IsIU               : (ins (VOP3PModsNeg i32:$src1_modifiers), Src1VT:$src1),
+                         NoABMods           : (ins Src1VT:$src1));
+  dag Src1OutPat = !cond(IsAB_F32F64_IMod1  : (ins Src1VT:$src1),
+                         IsAB_F16BF16_IMod1 : (ins Src1VT:$src1),
+                         IsAB_F16_IMod0     : (ins i32:$src1_modifiers, Src1VT:$src1),
+                         IsAB_BF16_IMod0    : (ins (i32 8), Src1VT:$src1),
+                         IsIU               : (ins i32:$src1_modifiers, Src1VT:$src1),
+                         NoABMods           : (ins Src1VT:$src1));
+  bit IsC_IMod1 = !and(HasIModOp, IsWMMA, !not(IsIU), !not(IsXF32));
+  bit IsC_F32_IMod0 = !and(IsC_F32, !not(HasIModOp));
+  bit IsC_F16_IMod0 = !and(IsC_F16, !not(HasIModOp));
+  bit IsC_BF16_IMod0 = !and(IsC_BF16, !not(HasIModOp));
+  bit IsIUXF32 = !or(IsIU, IsXF32);
+  dag Src2InPatWmma  = !cond(IsC_IMod1        : (ins Src2VT:$src2),
+                             IsC_F32_IMod0    : (ins (Src2VT (WMMAModsF32NegAbs Src2VT:$src2, i32:$src2_modifiers))),
+                             IsC_F16_IMod0    : (ins (Src2VT (WMMAModsF16NegAbs Src2VT:$src2, i32:$src2_modifiers))),
+                             IsC_BF16_IMod0   : (ins Src2VT:$src2),
+                             IsIUXF32         : (ins Src2VT:$src2),
+                             IsSWMMAC         : (ins));
+  dag Src2OutPatWmma = !cond(IsC_IMod1        : (ins Src2VT:$src2),
+                             IsC_F32_IMod0    : (ins i32:$src2_modifiers, Src2VT:$src2),
+                             IsC_F16_IMod0    : (ins i32:$src2_modifiers, Src2VT:$src2),
+                             IsC_BF16_IMod0   : (ins (i32 8), Src2VT:$src2),
+                             IsIUXF32         : (ins Src2VT:$src2),
+                             IsSWMMAC         : (ins));
+  dag ClampPat = !if(HasClamp, (ins i1:$clamp), (ins));
 
-  dag Src0InPat  = !cond(IsAB_F16  : (ins (Src0VT (WMMAModsF16Neg Src0VT:$src0, i32:$src0_modifiers))),
-                         IsAB_BF16 : (ins Src0VT:$src0),
-                         IsIU      : (ins (VOP3PModsNeg i32:$src0_modifiers), Src0VT:$src0),
-                         IsFP8BF8  : (ins Src0VT:$src0));
-  dag Src0OutPat = !cond(IsAB_F16  : (ins i32:$src0_modifiers, Src0VT:$src0),
-                         IsAB_BF16 : (ins (i32 8), Src0VT:$src0),
-                         IsIU      : (ins i32:$src0_modifiers, Src0VT:$src0),
-                         IsFP8BF8  : (ins Src0VT:$src0));
-  dag Src1InPat  = !cond(IsAB_F16  : (ins (Src1VT (WMMAModsF16Neg Src1VT:$src1, i32:$src1_modifiers))),
-                         IsAB_BF16 : (ins Src1VT:$src1),
-                         IsIU      : (ins (VOP3PModsNeg i32:$src1_modifiers), Src1VT:$src1),
-                         IsFP8BF8  : (ins Src1VT:$src1));
-  dag Src1OutPat = !cond(IsAB_F16  : (ins i32:$src1_modifiers, Src1VT:$src1),
-                         IsAB_BF16 : (ins (i32 8), Src1VT:$src1),
-                         IsIU      : (ins i32:$src1_modifiers, Src1VT:$src1),
-                         IsFP8BF8  : (ins Src1VT:$src1));
-  dag Src2InPatWmma  = !cond(IsC_F32  : (ins (Src2VT (WMMAModsF32NegAbs Src2VT:$src2, i32:$src2_modifiers))),
-                             IsC_F16  : (ins (Src2VT (WMMAModsF16NegAbs Src2VT:$src2, i32:$src2_modifiers))),
-                             IsC_BF16 : (ins Src2VT:$src2),
-                             IsIU     : (ins Src2VT:$src2),
-                             IsSWMMAC : (ins));
-  dag Src2OutPatWmma = !cond(IsC_F32  : (ins i32:$src2_modifiers, Src2VT:$src2),
-                             IsC_F16  : (ins i32:$src2_modifiers, Src2VT:$src2),
-                             IsC_BF16 : (ins (i32 8), Src2VT:$src2),
-                             IsIU     : (ins Src2VT:$src2),
-                             IsSWMMAC : (ins));
-  dag ClampPat = !if(IsIU, (ins i1:$clamp), (ins));
   dag IndexInPat = !cond(!eq(IndexType, 0) : (ins i32:$src2),
                          !eq(IndexType, 8) : (ins (i32 (SWMMACIndex8 i32:$src2, i32:$index_key_8bit))),
-                         !eq(IndexType, 16): (ins (i32 (SWMMACIndex16 i32:$src2, i32:$index_key_16bit))));
+                         !eq(IndexType, 16): (ins (i32 (SWMMACIndex16 i32:$src2, i32:$index_key_16bit))),
+                         !eq(IndexType, 32): (ins (i64 (SWMMACIndex32 i64:$src2, i32:$index_key_32bit))));
   dag IndexOutPat = !cond(!eq(IndexType, 0) : (ins i32:$src2),
                           !eq(IndexType, 8) : (ins i32:$src2, i32:$index_key_8bit),
-                          !eq(IndexType, 16): (ins i32:$src2, i32:$index_key_16bit));
+                          !eq(IndexType, 16): (ins i32:$src2, i32:$index_key_16bit),
+                          !eq(IndexType, 32): (ins i64:$src2, i32:$index_key_32bit));
   dag Src2InlineInPat = (ins (Src2VT (WMMAVISrc Src2VT:$src2)));
   dag Src2InlineOutPat = !con(!if(IsIU, (ins), (ins (i32 8))), (ins Src2VT:$src2));
 
@@ -1468,15 +1524,33 @@ class VOP3PWMMA_Profile<list<ValueType> ArgTy, bit _IsSWMMAC, int _IndexType,
   dag WmmaInlineOutPat = !con(Src0OutPat, Src1OutPat, Src2InlineOutPat, ClampPat);
 }
 
-multiclass WMMAInstGFX12<string Instr, VOP3PWMMA_Profile WMMAProfile, string PseudoInstrSuffix> {
+def WMMAInstInfoTable : GenericTable {
+  let FilterClass = "WMMAInstInfo";
+  let CppTypeName = "WMMAInstInfo";
+  let Fields = ["Opcode", "is_wmma_xdl"];
+
+  let PrimaryKey = ["Opcode"];
+  let PrimaryKeyName = "getWMMAInstInfoHelper";
+}
+
+class WMMAInstInfo {
+  Instruction Opcode = !cast<Instruction>(NAME);
+  bit is_wmma_xdl = 0;
+}
+
+multiclass WMMAInstGFX12<string Instr, VOP3PWMMA_Profile WMMAProfile, string PseudoInstrSuffix, bit DiffVdstSrc2 = 0> {
+
+  defvar WMMAConstraints2Addr = !if(DiffVdstSrc2, "@earlyclobber $vdst", "@earlyclobber $vdst,$vdst = $src2");
+  defvar WMMAConstraints3Addr = "@earlyclobber $vdst";
+
   let Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in {
-    let Constraints = "@earlyclobber $vdst,$vdst = $src2", isConvertibleToThreeAddress = 1 in
-      def _twoaddr : VOP3P_Pseudo<Instr, WMMAProfile>{
+    let Constraints = WMMAConstraints2Addr, isConvertibleToThreeAddress = 1 in
+      def _twoaddr : VOP3P_Pseudo<Instr, WMMAProfile>, WMMAInstInfo {
         let PseudoInstr = Instr#PseudoInstrSuffix;
       }
 
-    let Constraints = "@earlyclobber $vdst", SchedRW = [Write32Bit, Write32Bit] in
-      def _threeaddr : VOP3P_Pseudo<Instr, WMMAProfile>{
+    let Constraints = WMMAConstraints3Addr, SchedRW = [Write32Bit, Write32Bit] in
+      def _threeaddr : VOP3P_Pseudo<Instr, WMMAProfile>, WMMAInstInfo {
         let PseudoInstr = Instr#PseudoInstrSuffix;
       }
 
@@ -1486,7 +1560,7 @@ multiclass WMMAInstGFX12<string Instr, VOP3PWMMA_Profile WMMAProfile, string Pse
 }
 
 multiclass SWMMACInstGFX12<string Instr, VOP3PWMMA_Profile WMMAProfile, string PseudoInstrSuffix> {
-  def _twoaddr : VOP3P_Pseudo<Instr, WMMAProfile>{
+  def _twoaddr : VOP3P_Pseudo<Instr, WMMAProfile>, WMMAInstInfo {
     let Mnemonic = Instr;
     let PseudoInstr = Instr#PseudoInstrSuffix;
     let mayRaiseFPException = 0;
@@ -1556,6 +1630,76 @@ def F32_FP8BF8_SWMMAC_w64 : VOP3PWMMA_Profile<[v4f32,   i32, v2i32, v4f32], 1,
 // *** IU4X32_SWMMAC_w64 lanes 0-31 will have 8xi4 remaining lanes are ignored
 //                       for matrix A, index is i16; Matrix B uses all lanes
 
+def F64_F64X4_WMMA_w32           : VOP3PWMMA_Profile<[v8f64, v2f64, v2f64, v8f64], 0, 0, 0, 0, 1>;
+def F32_F32_WMMA_w32             : VOP3PWMMA_Profile<[v8f32, v2f32, v2f32, v8f32], 0, 0, 0, 0, 1, 1>;
+def F32_BF16X32_WMMA_w32         : VOP3PWMMA_Profile<[v8f32, v16bf16, v16bf16, v8f32], 0, 0, 0, 0, 0, 1>;
+def F32_F16X32_WMMA_w32          : VOP3PWMMA_Profile<[v8f32, v16f16, v16f16, v8f32], 0, 0, 0, 0, 1, 1>;
+def F16_F16X32_WMMA_w32          : VOP3PWMMA_Profile<[v8f16, v16f16, v16f16, v8f16], 0, 0, 0, 0, 1, 1>;
+def BF16_BF16X32_WMMA_w32        : VOP3PWMMA_Profile<[v8bf16, v16bf16, v16bf16, v8bf16], 0, 0, 0, 0, 1, 1>;
+def BF16F32_BF16_WMMA_w32        : VOP3PWMMA_Profile<[v8bf16, v16bf16, v16bf16, v8f32], 0, 0, 0, 0, 1, 1>;
+def F32_FP8BF8X64_WMMA_w32       : VOP3PWMMA_Profile<[v8f32, v8i32, v8i32, v8f32], 0, 0, 0, 1, 1, 1>;
+def F32_FP8BF8X128_WMMA_w32      : VOP3PWMMA_Profile<[v8f32, v16i32, v16i32, v8f32], 0, 0, 0, 1, 1, 1>;
+def F16_FP8BF8X64_WMMA_w32       : VOP3PWMMA_Profile<[v8f16, v8i32, v8i32, v8f16], 0, 0, 0, 1, 1, 1>;
+def F16_FP8BF8X128_WMMA_w32      : VOP3PWMMA_Profile<[v8f16, v16i32, v16i32, v8f16], 0, 0, 0, 1, 1, 1>;
+def F32_32X16X128_F4_WMMA_w32    : VOP3PWMMA_Profile<[v16f32, v16i32, v8i32, v16f32], 0, 0, 0, 0, 1, 0, 1>;
+def I32_IU8X64_WMMA_w32          : VOP3PWMMA_Profile<[v8i32, v8i32, v8i32, v8i32], 0, 0, 1, 0, 1, 1>;
+def F32_F16X64_SWMMAC_w32        : VOP3PWMMA_Profile<[v8f32, v16f16, v32f16, v8f32], 1, 16, 0, 0, 1, 1>;
+def F32_BF16X64_SWMMAC_w32       : VOP3PWMMA_Profile<[v8f32, v16bf16, v32bf16, v8f32], 1, 16, 0, 0, 1, 1>;
+def F16_F16X64_SWMMAC_w32        : VOP3PWMMA_Profile<[v8f16, v16f16, v32f16, v8f16], 1, 16, 0, 0, 1, 1>;
+def BF16_BF16X64_SWMMAC_w32      : VOP3PWMMA_Profile<[v8bf16, v16bf16, v32bf16, v8bf16], 1, 16, 0, 0, 1, 1>;
+def F32_FP8BF8X128_SWMMAC_w32    : VOP3PWMMA_Profile<[v8f32, v8i32,  v16i32, v8f32], 1, 32, 0, 1, 1, 1>;
+def F16_FP8BF8X128_SWMMAC_w32    : VOP3PWMMA_Profile<[v8f16, v8i32,  v16i32, v8f16], 1, 32, 0, 1, 1, 1>;
+def I32_IU8X128_SWMMAC_w32       : VOP3PWMMA_Profile<[v8i32, v8i32,  v16i32, v8i32], 1, 32, 1, 0, 1, 1>;
+
+let WaveSizePredicate = isWave32 in {
+let SubtargetPredicate = isGFX125xOnly in {
+defm V_WMMA_F32_16X16X4_F32_w32       : WMMAInstGFX12<"v_wmma_f32_16x16x4_f32",       F32_F32_WMMA_w32, "_w32">;
+
+let is_wmma_xdl = 1 in {
+defm V_WMMA_F32_16X16X32_BF16_w32     : WMMAInstGFX12<"v_wmma_f32_16x16x32_bf16",     F32_BF16X32_WMMA_w32, "_w32">;
+defm V_WMMA_BF16_16X16X32_BF16_w32    : WMMAInstGFX12<"v_wmma_bf16_16x16x32_bf16",    BF16_BF16X32_WMMA_w32, "_w32">;
+defm V_WMMA_BF16F32_16X16X32_BF16_w32 : WMMAInstGFX12<"v_wmma_bf16f32_16x16x32_bf16", BF16F32_BF16_WMMA_w32, "_w32", 1>;
+defm V_WMMA_F32_16X16X64_FP8_FP8_w32  : WMMAInstGFX12<"v_wmma_f32_16x16x64_fp8_fp8",  F32_FP8BF8X64_WMMA_w32, "_w32">;
+defm V_WMMA_F32_16X16X64_FP8_BF8_w32  : WMMAInstGFX12<"v_wmma_f32_16x16x64_fp8_bf8",  F32_FP8BF8X64_WMMA_w32, "_w32">;
+defm V_WMMA_F32_16X16X64_BF8_FP8_w32  : WMMAInstGFX12<"v_wmma_f32_16x16x64_bf8_fp8",  F32_FP8BF8X64_WMMA_w32, "_w32">;
+defm V_WMMA_F32_16X16X64_BF8_BF8_w32  : WMMAInstGFX12<"v_wmma_f32_16x16x64_bf8_bf8",  F32_FP8BF8X64_WMMA_w32, "_w32">;
+defm V_WMMA_F16_16X16X64_FP8_FP8_w32  : WMMAInstGFX12<"v_wmma_f16_16x16x64_fp8_fp8",  F16_FP8BF8X64_WMMA_w32, "_w32">;
+defm V_WMMA_F16_16X16X64_FP8_BF8_w32  : WMMAInstGFX12<"v_wmma_f16_16x16x64_fp8_bf8",  F16_FP8BF8X64_WMMA_w32, "_w32">;
+defm V_WMMA_F16_16X16X64_BF8_FP8_w32  : WMMAInstGFX12<"v_wmma_f16_16x16x64_bf8_fp8",  F16_FP8BF8X64_WMMA_w32, "_w32">;
+defm V_WMMA_F16_16X16X64_BF8_BF8_w32  : WMMAInstGFX12<"v_wmma_f16_16x16x64_bf8_bf8",  F16_FP8BF8X64_WMMA_w32, "_w32">;
+defm V_WMMA_I32_16X16X64_IU8_w32      : WMMAInstGFX12<"v_wmma_i32_16x16x64_iu8",      I32_IU8X64_WMMA_w32, "_w32">;
+defm V_WMMA_F32_16X16X32_F16_w32      : WMMAInstGFX12<"v_wmma_f32_16x16x32_f16",      F32_F16X32_WMMA_w32, "_w32">;
+defm V_WMMA_F16_16X16X32_F16_w32      : WMMAInstGFX12<"v_wmma_f16_16x16x32_f16",      F16_F16X32_WMMA_w32, "_w32">;
+defm V_WMMA_F16_16X16X128_FP8_FP8_w32 : WMMAInstGFX12<"v_wmma_f16_16x16x128_fp8_fp8", F16_FP8BF8X128_WMMA_w32, "_w32">;
+defm V_WMMA_F16_16X16X128_FP8_BF8_w32 : WMMAInstGFX12<"v_wmma_f16_16x16x128_fp8_bf8", F16_FP8BF8X128_WMMA_w32, "_w32">;
+defm V_WMMA_F16_16X16X128_BF8_FP8_w32 : WMMAInstGFX12<"v_wmma_f16_16x16x128_bf8_fp8", F16_FP8BF8X128_WMMA_w32, "_w32">;
+defm V_WMMA_F16_16X16X128_BF8_BF8_w32 : WMMAInstGFX12<"v_wmma_f16_16x16x128_bf8_bf8", F16_FP8BF8X128_WMMA_w32, "_w32">;
+defm V_WMMA_F32_16X16X128_FP8_FP8_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x128_fp8_fp8", F32_FP8BF8X128_WMMA_w32, "_w32">;
+defm V_WMMA_F32_16X16X128_FP8_BF8_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x128_fp8_bf8", F32_FP8BF8X128_WMMA_w32, "_w32">;
+defm V_WMMA_F32_16X16X128_BF8_FP8_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x128_bf8_fp8", F32_FP8BF8X128_WMMA_w32, "_w32">;
+defm V_WMMA_F32_16X16X128_BF8_BF8_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x128_bf8_bf8", F32_FP8BF8X128_WMMA_w32, "_w32">;
+defm V_WMMA_F32_32X16X128_F4_w32      : WMMAInstGFX12<"v_wmma_f32_32x16x128_f4",      F32_32X16X128_F4_WMMA_w32, "_w32">;
+
+defm V_SWMMAC_F32_16X16X64_BF16_w32     : SWMMACInstGFX12<"v_swmmac_f32_16x16x64_bf16",     F32_BF16X64_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_BF16_16X16X64_BF16_w32    : SWMMACInstGFX12<"v_swmmac_bf16_16x16x64_bf16",    BF16_BF16X64_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_BF16F32_16X16X64_BF16_w32 : SWMMACInstGFX12<"v_swmmac_bf16f32_16x16x64_bf16", F32_BF16X64_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_F32_16X16X128_FP8_FP8_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x128_fp8_fp8", F32_FP8BF8X128_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_F32_16X16X128_FP8_BF8_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x128_fp8_bf8", F32_FP8BF8X128_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_F32_16X16X128_BF8_FP8_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x128_bf8_fp8", F32_FP8BF8X128_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_F32_16X16X128_BF8_BF8_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x128_bf8_bf8", F32_FP8BF8X128_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_F16_16X16X128_FP8_FP8_w32 : SWMMACInstGFX12<"v_swmmac_f16_16x16x128_fp8_fp8", F16_FP8BF8X128_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_F16_16X16X128_FP8_BF8_w32 : SWMMACInstGFX12<"v_swmmac_f16_16x16x128_fp8_bf8", F16_FP8BF8X128_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_F16_16X16X128_BF8_FP8_w32 : SWMMACInstGFX12<"v_swmmac_f16_16x16x128_bf8_fp8", F16_FP8BF8X128_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_F16_16X16X128_BF8_BF8_w32 : SWMMACInstGFX12<"v_swmmac_f16_16x16x128_bf8_bf8", F16_FP8BF8X128_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_I32_16X16X128_IU8_w32     : SWMMACInstGFX12<"v_swmmac_i32_16x16x128_iu8",     I32_IU8X128_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_F32_16X16X64_F16_w32      : SWMMACInstGFX12<"v_swmmac_f32_16x16x64_f16",      F32_F16X64_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_F16_16X16X64_F16_w32      : SWMMACInstGFX12<"v_swmmac_f16_16x16x64_f16",      F16_F16X64_SWMMAC_w32, "_w32">;
+
+} // End is_wmma_xdl = 1.
+
+} // End SubtargetPredicate = isGFX125xOnly
+} // End WaveSizePredicate = isWave32
+
 let WaveSizePredicate = isWave32 in {
 defm V_WMMA_F32_16X16X16_F16_w32     : WMMAInstGFX12<"v_wmma_f32_16x16x16_f16",     F32_F16_WMMA_w32, "_w32">;
 defm V_WMMA_F32_16X16X16_BF16_w32    : WMMAInstGFX12<"v_wmma_f32_16x16x16_bf16",    F32_BF16_WMMA_w32, "_w32">;
@@ -1726,13 +1870,14 @@ class VOP3PeWmma<bits<8> op, VOPProfile P, VOP3PWMMA_Profile WMMAP>
   // opsel
   let Inst{11} = !cond(!eq(WMMAP.IndexType, 0)  : 0,
                        !eq(WMMAP.IndexType, 8)  : index_key_8bit{0},
-                       !eq(WMMAP.IndexType, 16) : index_key_16bit{0});
+                       !eq(WMMAP.IndexType, 16) : index_key_16bit{0},
+                       !eq(WMMAP.IndexType, 32) : index_key_32bit{0});
   let Inst{12} = !if(!eq(WMMAP.IndexType, 8), index_key_8bit{1}, 0);
-  let Inst{13} = 0;
+  let Inst{13} = !if(WMMAP.HasMatrixReuse, matrix_a_reuse, 0);
   // opsel_hi
   let Inst{59} = 1;
   let Inst{60} = 1;
-  let Inst{14} = 1;
+  let Inst{14} = !if(WMMAP.HasMatrixReuse, matrix_b_reuse, 1);
   // neg_lo
   let Inst{61} = !if(WMMAP.NegLo01, src0_modifiers{0}, 0);
   let Inst{62} = !if(WMMAP.NegLo01, src1_modifiers{0}, 0);
@@ -1742,7 +1887,7 @@ class VOP3PeWmma<bits<8> op, VOPProfile P, VOP3PWMMA_Profile WMMAP>
   let Inst{9}  = !if(WMMAP.NegHi01, src1_modifiers{1}, 0);
   let Inst{10} = !if(WMMAP.NegHi2, src2_modifiers{1}, 0);
   // clamp
-  let Inst{15} = !if(WMMAP.IsIU, clamp{0}, 0);
+  let Inst{15} = !if(WMMAP.HasClamp, clamp{0}, 0);
 }
 
 multiclass VOP3P_WMMA_Real_Base<GFXGen Gen, bits<8> op, VOP3PWMMA_Profile WMMAP,
@@ -1765,6 +1910,12 @@ multiclass VOP3P_Real_WMMA_gfx12w64 <bits<8> op, VOP3PWMMA_Profile WMMAP> {
   }
 }
 
+multiclass VOP3P_Real_WMMA_gfx1250 <bits<8> op, VOP3PWMMA_Profile WMMAP> {
+  let WaveSizePredicate = isWave32, DecoderNamespace = "GFX12" in {
+    defm _twoaddr : VOP3P_WMMA_Real_Base <GFX1250Gen, op, WMMAP>;
+  }
+}
+
 defm V_WMMA_F32_16X16X16_F16_w32     : VOP3P_Real_WMMA_gfx12 <0x040, F32_F16_WMMA_w32>;
 defm V_WMMA_F32_16X16X16_BF16_w32    : VOP3P_Real_WMMA_gfx12 <0x041, F32_BF16_WMMA_w32>;
 defm V_WMMA_F16_16X16X16_F16_w32     : VOP3P_Real_WMMA_gfx12 <0x042, F16_F16_WMMA_w32>;
@@ -1814,6 +1965,46 @@ defm V_SWMMAC_F32_16X16X32_FP8_BF8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x058, F32_FP
 defm V_SWMMAC_F32_16X16X32_BF8_FP8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x059, F32_FP8BF8_SWMMAC_w64>;
 defm V_SWMMAC_F32_16X16X32_BF8_BF8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x05a, F32_FP8BF8_SWMMAC_w64>;
 
+defm V_WMMA_F32_16X16X4_F32_w32       : VOP3P_Real_WMMA_gfx1250 <0x05d, F32_F32_WMMA_w32>;
+defm V_WMMA_F32_16X16X32_BF16_w32     : VOP3P_Real_WMMA_gfx1250 <0x062, F32_BF16X32_WMMA_w32>;
+defm V_WMMA_F32_16X16X32_F16_w32      : VOP3P_Real_WMMA_gfx1250 <0x060, F32_F16X32_WMMA_w32>;
+defm V_WMMA_F16_16X16X32_F16_w32      : VOP3P_Real_WMMA_gfx1250 <0x061, F16_F16X32_WMMA_w32>;
+defm V_WMMA_BF16_16X16X32_BF16_w32    : VOP3P_Real_WMMA_gfx1250 <0x063, BF16_BF16X32_WMMA_w32>;
+defm V_WMMA_BF16F32_16X16X32_BF16_w32 : VOP3P_Real_WMMA_gfx1250 <0x064, BF16F32_BF16_WMMA_w32>;
+defm V_WMMA_F32_16X16X64_FP8_FP8_w32  : VOP3P_Real_WMMA_gfx1250 <0x06a, F32_FP8BF8X64_WMMA_w32>;
+defm V_WMMA_F32_16X16X64_FP8_BF8_w32  : VOP3P_Real_WMMA_gfx1250 <0x06b, F32_FP8BF8X64_WMMA_w32>;
+defm V_WMMA_F32_16X16X64_BF8_FP8_w32  : VOP3P_Real_WMMA_gfx1250 <0x06c, F32_FP8BF8X64_WMMA_w32>;
+defm V_WMMA_F32_16X16X64_BF8_BF8_w32  : VOP3P_Real_WMMA_gfx1250 <0x06d, F32_FP8BF8X64_WMMA_w32>;
+defm V_WMMA_F16_16X16X64_FP8_FP8_w32  : VOP3P_Real_WMMA_gfx1250 <0x06e, F16_FP8BF8X64_WMMA_w32>;
+defm V_WMMA_F16_16X16X64_FP8_BF8_w32  : VOP3P_Real_WMMA_gfx1250 <0x06f, F16_FP8BF8X64_WMMA_w32>;
+defm V_WMMA_F16_16X16X64_BF8_FP8_w32  : VOP3P_Real_WMMA_gfx1250 <0x070, F16_FP8BF8X64_WMMA_w32>;
+defm V_WMMA_F16_16X16X64_BF8_BF8_w32  : VOP3P_Real_WMMA_gfx1250 <0x071, F16_FP8BF8X64_WMMA_w32>;
+defm V_WMMA_I32_16X16X64_IU8_w32      : VOP3P_Real_WMMA_gfx1250 <0x072, I32_IU8X64_WMMA_w32>;
+defm V_WMMA_F32_16X16X128_FP8_FP8_w32 : VOP3P_Real_WMMA_gfx1250 <0x080, F32_FP8BF8X128_WMMA_w32>;
+defm V_WMMA_F32_16X16X128_FP8_BF8_w32 : VOP3P_Real_WMMA_gfx1250 <0x081, F32_FP8BF8X128_WMMA_w32>;
+defm V_WMMA_F32_16X16X128_BF8_FP8_w32 : VOP3P_Real_WMMA_gfx1250 <0x082, F32_FP8BF8X128_WMMA_w32>;
+defm V_WMMA_F32_16X16X128_BF8_BF8_w32 : VOP3P_Real_WMMA_gfx1250 <0x083, F32_FP8BF8X128_WMMA_w32>;
+defm V_WMMA_F16_16X16X128_FP8_FP8_w32 : VOP3P_Real_WMMA_gfx1250 <0x084, F16_FP8BF8X128_WMMA_w32>;
+defm V_WMMA_F16_16X16X128_FP8_BF8_w32 : VOP3P_Real_WMMA_gfx1250 <0x085, F16_FP8BF8X128_WMMA_w32>;
+defm V_WMMA_F16_16X16X128_BF8_FP8_w32 : VOP3P_Real_WMMA_gfx1250 <0x086, F16_FP8BF8X128_WMMA_w32>;
+defm V_WMMA_F16_16X16X128_BF8_BF8_w32 : VOP3P_Real_WMMA_gfx1250 <0x087, F16_FP8BF8X128_WMMA_w32>;
+defm V_WMMA_F32_32X16X128_F4_w32      : VOP3P_Real_WMMA_gfx1250 <0x088, F32_32X16X128_F4_WMMA_w32>;
+
+defm V_SWMMAC_F32_16X16X64_F16_w32      : VOP3P_Real_WMMA_gfx1250 <0x065, F32_F16X64_SWMMAC_w32>;
+defm V_SWMMAC_F32_16X16X64_BF16_w32     : VOP3P_Real_WMMA_gfx1250 <0x066, F32_BF16X64_SWMMAC_w32>;
+defm V_SWMMAC_F16_16X16X64_F16_w32      : VOP3P_Real_WMMA_gfx1250 <0x067, F16_F16X64_SWMMAC_w32>;
+defm V_SWMMAC_BF16_16X16X64_BF16_w32    : VOP3P_Real_WMMA_gfx1250 <0x068, BF16_BF16X64_SWMMAC_w32>;
+defm V_SWMMAC_BF16F32_16X16X64_BF16_w32 : VOP3P_Real_WMMA_gfx1250 <0x069, F32_BF16X64_SWMMAC_w32>;
+defm V_SWMMAC_F32_16X16X128_FP8_FP8_w32 : VOP3P_Real_WMMA_gfx1250 <0x073, F32_FP8BF8X128_SWMMAC_w32>;
+defm V_SWMMAC_F32_16X16X128_FP8_BF8_w32 : VOP3P_Real_WMMA_gfx1250 <0x074, F32_FP8BF8X128_SWMMAC_w32>;
+defm V_SWMMAC_F32_16X16X128_BF8_FP8_w32 : VOP3P_Real_WMMA_gfx1250 <0x075, F32_FP8BF8X128_SWMMAC_w32>;
+defm V_SWMMAC_F32_16X16X128_BF8_BF8_w32 : VOP3P_Real_WMMA_gfx1250 <0x076, F32_FP8BF8X128_SWMMAC_w32>;
+defm V_SWMMAC_F16_16X16X128_FP8_FP8_w32 : VOP3P_Real_WMMA_gfx1250 <0x077, F16_FP8BF8X128_SWMMAC_w32>;
+defm V_SWMMAC_F16_16X16X128_FP8_BF8_w32 : VOP3P_Real_WMMA_gfx1250 <0x078, F16_FP8BF8X128_SWMMAC_w32>;
+defm V_SWMMAC_F16_16X16X128_BF8_FP8_w32 : VOP3P_Real_WMMA_gfx1250 <0x079, F16_FP8BF8X128_SWMMAC_w32>;
+defm V_SWMMAC_F16_16X16X128_BF8_BF8_w32 : VOP3P_Real_WMMA_gfx1250 <0x07a, F16_FP8BF8X128_SWMMAC_w32>;
+defm V_SWMMAC_I32_16X16X128_IU8_w32     : VOP3P_Real_WMMA_gfx1250 <0x07b, I32_IU8X128_SWMMAC_w32>;
+
 multiclass VOP3P_Real_with_name<GFXGen Gen, bits<8> op,
                           string backing_ps_name = NAME,
                           string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> {

diff  --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index df215d23f7f40..2b91ea7386be4 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -432,7 +432,7 @@ class VOP3be <VOPProfile P> : Enc64 {
   let Inst{63}    = !if(P.HasSrc2Mods, src2_modifiers{0}, 0);
 }
 
-class VOP3Pe <VOPProfile P> : Enc64 {
+class VOP3Pe_Base {
   bits<8> vdst;
   bits<4> src0_modifiers;
   bits<9> src0;
@@ -443,7 +443,12 @@ class VOP3Pe <VOPProfile P> : Enc64 {
   bits<1> clamp;
   bits<2> index_key_8bit;
   bits<1> index_key_16bit;
+  bits<1> index_key_32bit;
+  bits<1> matrix_a_reuse;
+  bits<1> matrix_b_reuse;
+}
 
+class VOP3Pe <VOPProfile P> : Enc64, VOP3Pe_Base {
   let Inst{7-0} = !if(P.HasDst, vdst, 0);
   let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); // neg_hi src0
   let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); // neg_hi src1
@@ -451,9 +456,13 @@ class VOP3Pe <VOPProfile P> : Enc64 {
 
   let Inst{11} = !if(!and(P.HasSrc0, P.HasOpSel), src0_modifiers{2}, 0); // op_sel(0)
   let Inst{12} = !if(!and(P.HasSrc1, P.HasOpSel), src1_modifiers{2}, 0); // op_sel(1)
-  let Inst{13} = !if(!and(P.HasSrc2, P.HasOpSel), src2_modifiers{2}, 0); // op_sel(2)
+  let Inst{13} = !if(!and(P.HasSrc2, P.HasOpSel), src2_modifiers{2},
+                     !if(P.HasMatrixReuse, matrix_a_reuse, 0));    // op_sel(2)
 
-  let Inst{14} = !if(!and(P.HasSrc2, P.HasOpSel), src2_modifiers{3}, !if(P.IsDOT, 1, ?)); // op_sel_hi(2)
+  let Inst{14} = !cond(!and(P.HasSrc2, P.HasOpSel) : src2_modifiers{3},
+                       P.IsDOT : 1,
+                       P.HasMatrixReuse : matrix_b_reuse,
+                       1: ?); // op_sel_hi(2)
 
   let Inst{15} = !if(P.HasClamp, clamp{0}, 0);
 

diff  --git a/llvm/test/MC/AMDGPU/gfx1250_asm_wmma_w32.s b/llvm/test/MC/AMDGPU/gfx1250_asm_wmma_w32.s
new file mode 100644
index 0000000000000..e81b6a1d13391
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_wmma_w32.s
@@ -0,0 +1,1234 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=WAVESIZE-ERR --implicit-check-not=error: --strict-whitespace %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
+
+v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11]
+// GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] ; encoding: [0x04,0x00,0x5d,0xcc,0x00,0x05,0x12,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], 1.0
+// GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], 1.0 ; encoding: [0x04,0x00,0x5d,0xcc,0x00,0x05,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], 1.0 neg_lo:[0,0,1] ; encoding: [0x04,0x00,0x5d,0xcc,0x00,0x05,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0]
+// GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0] ; encoding: [0x04,0x00,0x5d,0xcc,0x00,0x05,0x12,0x3c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0]
+// GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0] ; encoding: [0x04,0x00,0x5d,0xcc,0x00,0x05,0x12,0x5c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1] ; encoding: [0x04,0x00,0x5d,0xcc,0x00,0x05,0x12,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1] ; encoding: [0x04,0x04,0x5d,0xcc,0x00,0x05,0x12,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] matrix_a_reuse
+// GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] matrix_a_reuse ; encoding: [0x04,0x20,0x5d,0xcc,0x00,0x05,0x12,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] matrix_b_reuse
+// GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] matrix_b_reuse ; encoding: [0x04,0x40,0x5d,0xcc,0x00,0x05,0x12,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23]
+// GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x62,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], 1.0
+// GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x62,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x62,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] neg_hi:[1,0,0]
+// GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x10,0x01,0x62,0xcc,0x00,0x11,0x42,0x3c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] neg_hi:[0,1,0]
+// GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x10,0x02,0x62,0xcc,0x00,0x11,0x42,0x5c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x62,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x62,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse
+// GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x62,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse
+// GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x62,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19]
+// GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x00,0x63,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], 1.0
+// GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x63,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x63,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[1,0,0] neg_hi:[1,0,0]
+// GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x10,0x01,0x63,0xcc,0x00,0x11,0x42,0x3c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,1,0] neg_hi:[0,1,0]
+// GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x10,0x02,0x63,0xcc,0x00,0x11,0x42,0x5c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1]
+// GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x63,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1]
+// GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x63,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse
+// GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x63,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse
+// GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x63,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23]
+// GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] ; encoding: [0x1a,0x00,0x64,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], 1.0
+// GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], 1.0 ; encoding: [0x1a,0x00,0x64,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x1a,0x00,0x64,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] neg_hi:[1,0,0]
+// GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x1a,0x01,0x64,0xcc,0x00,0x11,0x42,0x3c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] neg_hi:[0,1,0]
+// GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x1a,0x02,0x64,0xcc,0x00,0x11,0x42,0x5c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1]
+// GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x1a,0x00,0x64,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1]
+// GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x1a,0x04,0x64,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] matrix_a_reuse
+// GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x1a,0x20,0x64,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] matrix_b_reuse
+// GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x1a,0x40,0x64,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23]
+// GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x6a,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], 1.0
+// GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x6a,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6a,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6a,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x6a,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse
+// GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x6a,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse
+// GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x6a,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23]
+// GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x6b,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], 1.0
+// GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x6b,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6b,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6b,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x6b,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse
+// GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x6b,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse
+// GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x6b,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23]
+// GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x6c,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], 1.0
+// GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x6c,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6c,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6c,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x6c,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse
+// GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x6c,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse
+// GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x6c,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23]
+// GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x6d,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], 1.0
+// GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x6d,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6d,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6d,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x6d,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse
+// GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x6d,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse
+// GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x6d,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19]
+// GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x00,0x6e,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], 1.0
+// GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x6e,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6e,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6e,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x6e,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse
+// GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x6e,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse
+// GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x6e,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19]
+// GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x00,0x6f,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], 1.0
+// GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x6f,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6f,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6f,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x6f,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse
+// GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x6f,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse
+// GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x6f,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19]
+// GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x00,0x70,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], 1.0
+// GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x70,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x70,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x70,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x70,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse
+// GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x70,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse
+// GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x70,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19]
+// GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x00,0x71,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], 1.0
+// GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x71,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x71,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x71,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x71,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse
+// GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x71,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse
+// GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x71,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23]
+// GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x72,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], 1
+// GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], 1 ; encoding: [0x10,0x00,0x72,0xcc,0x00,0x11,0x06,0x1a]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0]
+// GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] ; encoding: [0x10,0x00,0x72,0xcc,0x00,0x11,0x42,0x3c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0]
+// GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] ; encoding: [0x10,0x00,0x72,0xcc,0x00,0x11,0x42,0x5c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse
+// GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x72,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse
+// GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x72,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23]
+// GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x60,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], 1.0
+// GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x60,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x60,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] neg_hi:[1,0,0]
+// GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x10,0x01,0x60,0xcc,0x00,0x11,0x42,0x3c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] neg_hi:[0,1,0]
+// GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x10,0x02,0x60,0xcc,0x00,0x11,0x42,0x5c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x60,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x60,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse
+// GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x60,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse
+// GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x60,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19]
+// GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x00,0x61,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], 1.0
+// GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x61,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x61,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[1,0,0] neg_hi:[1,0,0]
+// GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x10,0x01,0x61,0xcc,0x00,0x11,0x42,0x3c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,1,0] neg_hi:[0,1,0]
+// GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x10,0x02,0x61,0xcc,0x00,0x11,0x42,0x5c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x61,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x61,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse
+// GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x61,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse
+// GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x61,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32
+// GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 ; encoding: [0x18,0x00,0x66,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 index_key:1
+// GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 index_key:1 ; encoding: [0x18,0x08,0x66,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[1,0,0] neg_hi:[1,0,0]
+// GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x18,0x01,0x66,0xcc,0x00,0x11,0x82,0x3c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[0,1,0] neg_hi:[0,1,0]
+// GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x18,0x02,0x66,0xcc,0x00,0x11,0x82,0x5c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_a_reuse
+// GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_a_reuse ; encoding: [0x18,0x20,0x66,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_b_reuse
+// GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_b_reuse ; encoding: [0x18,0x40,0x66,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28
+// GFX1250: v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 ; encoding: [0x18,0x00,0x68,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 index_key:1
+// GFX1250: v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 index_key:1 ; encoding: [0x18,0x08,0x68,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[1,0,0] neg_hi:[1,0,0]
+// GFX1250: v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x18,0x01,0x68,0xcc,0x00,0x11,0x72,0x3c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[0,1,0] neg_hi:[0,1,0]
+// GFX1250: v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x18,0x02,0x68,0xcc,0x00,0x11,0x72,0x5c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_a_reuse
+// GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_a_reuse ; encoding: [0x18,0x20,0x66,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_b_reuse
+// GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_b_reuse ; encoding: [0x18,0x40,0x66,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32
+// GFX1250: v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 ; encoding: [0x18,0x00,0x69,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 index_key:1
+// GFX1250: v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 index_key:1 ; encoding: [0x18,0x08,0x69,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[1,0,0] neg_hi:[1,0,0]
+// GFX1250: v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x18,0x01,0x69,0xcc,0x00,0x11,0x82,0x3c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[0,1,0] neg_hi:[0,1,0]
+// GFX1250: v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x18,0x02,0x69,0xcc,0x00,0x11,0x82,0x5c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_a_reuse
+// GFX1250: v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_a_reuse ; encoding: [0x18,0x20,0x69,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_b_reuse
+// GFX1250: v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_b_reuse ; encoding: [0x18,0x40,0x69,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33]
+// GFX1250: v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] ; encoding: [0x18,0x00,0x73,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1
+// GFX1250: v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1 ; encoding: [0x18,0x08,0x73,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse
+// GFX1250: v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse ; encoding: [0x18,0x20,0x73,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse
+// GFX1250: v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse ; encoding: [0x18,0x40,0x73,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33]
+// GFX1250: v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] ; encoding: [0x18,0x00,0x74,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1
+// GFX1250: v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1 ; encoding: [0x18,0x08,0x74,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse
+// GFX1250: v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse ; encoding: [0x18,0x20,0x74,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse
+// GFX1250: v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse ; encoding: [0x18,0x40,0x74,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33]
+// GFX1250: v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] ; encoding: [0x18,0x00,0x75,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1
+// GFX1250: v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1 ; encoding: [0x18,0x08,0x75,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse
+// GFX1250: v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse ; encoding: [0x18,0x20,0x75,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse
+// GFX1250: v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse ; encoding: [0x18,0x40,0x75,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33]
+// GFX1250: v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] ; encoding: [0x18,0x00,0x76,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1
+// GFX1250: v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1 ; encoding: [0x18,0x08,0x76,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse
+// GFX1250: v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse ; encoding: [0x18,0x20,0x76,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse
+// GFX1250: v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse ; encoding: [0x18,0x40,0x76,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29]
+// GFX1250: v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] ; encoding: [0x18,0x00,0x77,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:1
+// GFX1250: v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:1 ; encoding: [0x18,0x08,0x77,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_a_reuse
+// GFX1250: v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_a_reuse ; encoding: [0x18,0x20,0x77,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_b_reuse
+// GFX1250: v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_b_reuse ; encoding: [0x18,0x40,0x77,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29]
+// GFX1250: v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] ; encoding: [0x18,0x00,0x78,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:1
+// GFX1250: v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:1 ; encoding: [0x18,0x08,0x78,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_a_reuse
+// GFX1250: v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_a_reuse ; encoding: [0x18,0x20,0x78,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_b_reuse
+// GFX1250: v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_b_reuse ; encoding: [0x18,0x40,0x78,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29]
+// GFX1250: v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] ; encoding: [0x18,0x00,0x79,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:1
+// GFX1250: v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:1 ; encoding: [0x18,0x08,0x79,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_a_reuse
+// GFX1250: v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_a_reuse ; encoding: [0x18,0x20,0x79,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_b_reuse
+// GFX1250: v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_b_reuse ; encoding: [0x18,0x40,0x79,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29]
+// GFX1250: v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] ; encoding: [0x18,0x00,0x7a,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:1
+// GFX1250: v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:1 ; encoding: [0x18,0x08,0x7a,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_a_reuse
+// GFX1250: v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_a_reuse ; encoding: [0x18,0x20,0x7a,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_b_reuse
+// GFX1250: v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_b_reuse ; encoding: [0x18,0x40,0x7a,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33]
+// GFX1250: v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] ; encoding: [0x18,0x00,0x7b,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1
+// GFX1250: v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1 ; encoding: [0x18,0x08,0x7b,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[1,0,0]
+// GFX1250: v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[1,0,0] ; encoding: [0x18,0x00,0x7b,0xcc,0x00,0x11,0x82,0x3c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[0,1,0]
+// GFX1250: v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[0,1,0] ; encoding: [0x18,0x00,0x7b,0xcc,0x00,0x11,0x82,0x5c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse
+// GFX1250: v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse ; encoding: [0x18,0x20,0x7b,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse
+// GFX1250: v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse ; encoding: [0x18,0x40,0x7b,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32
+// GFX1250: v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 ; encoding: [0x18,0x00,0x65,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 index_key:1
+// GFX1250: v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 index_key:1 ; encoding: [0x18,0x08,0x65,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[1,0,0] neg_hi:[1,0,0]
+// GFX1250: v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x18,0x01,0x65,0xcc,0x00,0x11,0x82,0x3c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[0,1,0] neg_hi:[0,1,0]
+// GFX1250: v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x18,0x02,0x65,0xcc,0x00,0x11,0x82,0x5c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 matrix_a_reuse
+// GFX1250: v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 matrix_a_reuse ; encoding: [0x18,0x20,0x65,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 matrix_b_reuse
+// GFX1250: v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 matrix_b_reuse ; encoding: [0x18,0x40,0x65,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28
+// GFX1250: v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 ; encoding: [0x18,0x00,0x67,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 index_key:1
+// GFX1250: v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 index_key:1 ; encoding: [0x18,0x08,0x67,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[1,0,0] neg_hi:[1,0,0]
+// GFX1250: v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x18,0x01,0x67,0xcc,0x00,0x11,0x72,0x3c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[0,1,0] neg_hi:[0,1,0]
+// GFX1250: v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x18,0x02,0x67,0xcc,0x00,0x11,0x72,0x5c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 matrix_a_reuse
+// GFX1250: v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 matrix_a_reuse ; encoding: [0x18,0x20,0x67,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 matrix_b_reuse
+// GFX1250: v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 matrix_b_reuse ; encoding: [0x18,0x40,0x67,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19]
+// GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] ; encoding: [0x10,0x00,0x84,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], 1.0
+// GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x84,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x84,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x84,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x84,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_a_reuse
+// GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x84,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_b_reuse
+// GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x84,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19]
+// GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] ; encoding: [0x10,0x00,0x85,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], 1.0
+// GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x85,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x85,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x85,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x85,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_a_reuse
+// GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x85,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_b_reuse
+// GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x85,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19]
+// GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] ; encoding: [0x10,0x00,0x86,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], 1.0
+// GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x86,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x86,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x86,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x86,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_a_reuse
+// GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x86,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_b_reuse
+// GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x86,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19]
+// GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] ; encoding: [0x10,0x00,0x87,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], 1.0
+// GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x87,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x87,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x87,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x87,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_a_reuse
+// GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x87,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_b_reuse
+// GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x87,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23]
+// GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] ; encoding: [0x10,0x00,0x80,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], 1.0
+// GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x80,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x80,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x80,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x80,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_a_reuse
+// GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x80,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_b_reuse
+// GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x80,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23]
+// GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] ; encoding: [0x10,0x00,0x81,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], 1.0
+// GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x81,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x81,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x81,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x81,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_a_reuse
+// GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x81,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_b_reuse
+// GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x81,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23]
+// GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] ; encoding: [0x10,0x00,0x82,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], 1.0
+// GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x82,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x82,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x82,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x82,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_a_reuse
+// GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x82,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_b_reuse
+// GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x82,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23]
+// GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] ; encoding: [0x10,0x00,0x83,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], 1.0
+// GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x83,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x83,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x83,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x83,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_a_reuse
+// GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x83,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_b_reuse
+// GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x83,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19]
+// GFX1250: v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] ; encoding: [0x04,0x40,0x88,0xcc,0x00,0x05,0x12,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], 1.0
+// GFX1250: v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], 1.0 ; encoding: [0x04,0x40,0x88,0xcc,0x00,0x05,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], 1.0 neg_lo:[0,0,1] ; encoding: [0x04,0x40,0x88,0xcc,0x00,0x05,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_lo:[0,0,1] ; encoding: [0x04,0x40,0x88,0xcc,0x00,0x05,0x12,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_hi:[0,0,1] ; encoding: [0x04,0x44,0x88,0xcc,0x00,0x05,0x12,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_lo:[0,0,1] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x04,0x44,0x88,0xcc,0x00,0x05,0x12,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU

diff  --git a/llvm/test/MC/AMDGPU/gfx1250_asm_wmma_w32_err.s b/llvm/test/MC/AMDGPU/gfx1250_asm_wmma_w32_err.s
new file mode 100644
index 0000000000000..47445d368d3a0
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_wmma_w32_err.s
@@ -0,0 +1,384 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: %s
+
+v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], s[4:11]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], 3.0
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], s[16:23]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], 3.0
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], s[16:19]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], 3.0
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], s[16:23]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], 3.0
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], s[16:23]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], 3.0
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], s[16:23]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], 3.0
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], s[16:23]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], 3.0
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], s[16:23]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], 3.0
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], s[16:19]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], 3.0
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], s[16:19]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], 3.0
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], s[16:19]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], 3.0
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], s[16:19]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], 3.0
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], s[16:23]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], 128
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] clamp
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], s[16:23]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], 3.0
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], s[16:19]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], 3.0
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], s32
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], 1.0, v32
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 index_key:2
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[0,0,1]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], s28
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], 1.0, v28
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 index_key:2
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[0,0,1]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], s32
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], 1.0, v32
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 index_key:2
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[0,0,1]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], s[32:33]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], 1.0, v[32:33]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:2
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[0,0,1]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], s[32:33]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], 1.0, v[32:33]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:2
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[0,0,1]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], s[32:33]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], 1.0, v[32:33]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:2
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[0,0,1]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], s[32:33]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], 1.0, v[32:33]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:2
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[0,0,1]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], s[28:29]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], 1.0, v[28:29]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:2
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] neg_lo:[0,0,1]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], s[28:29]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], 1.0, v[28:29]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:2
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] neg_lo:[0,0,1]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], s[28:29]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], 1.0, v[28:29]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:2
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] neg_lo:[0,0,1]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], s[28:29]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], 1.0, v[28:29]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:2
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] neg_lo:[0,0,1]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], s[32:33]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], 1, v[32:33]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] clamp
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:2
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[0,0,1]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], s32
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], 1.0, v32
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 index_key:2
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[0,0,1]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], s28
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], 1.0, v28
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 index_key:2
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[0,0,1]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+// GFX1250-ERR-NEXT: {{^}}v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_lo:[1,0,0]
+// GFX1250-ERR-NEXT: {{^}}                                                          ^
+
+v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+// GFX1250-ERR-NEXT: {{^}}v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_lo:[0,1,0]
+// GFX1250-ERR-NEXT: {{^}}                                                          ^
+
+v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_hi:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand
+// GFX1250-ERR-NEXT: {{^}}v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_hi:[1,0,0]
+// GFX1250-ERR-NEXT: {{^}}                                                          ^
+
+v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_hi:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand
+// GFX1250-ERR-NEXT: {{^}}v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_hi:[0,1,0]
+// GFX1250-ERR-NEXT: {{^}}                                                          ^

diff  --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_wmma_w32.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_wmma_w32.txt
new file mode 100644
index 0000000000000..d76ec4c7e6185
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_wmma_w32.txt
@@ -0,0 +1,734 @@
+# NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+# RUN: llvm-mc -disassemble -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
+
+0x18,0x00,0x68,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 ; encoding: [0x18,0x00,0x68,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x08,0x68,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 index_key:1 ; encoding: [0x18,0x08,0x68,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x02,0x68,0xcc,0x00,0x11,0x72,0x5c
+# GFX1250: v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x18,0x02,0x68,0xcc,0x00,0x11,0x72,0x5c]
+
+0x18,0x01,0x68,0xcc,0x00,0x11,0x72,0x3c
+# GFX1250: v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x18,0x01,0x68,0xcc,0x00,0x11,0x72,0x3c]
+
+0x18,0x00,0x69,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 ; encoding: [0x18,0x00,0x69,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x08,0x69,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 index_key:1 ; encoding: [0x18,0x08,0x69,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x20,0x69,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_a_reuse ; encoding: [0x18,0x20,0x69,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x40,0x69,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_b_reuse ; encoding: [0x18,0x40,0x69,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x02,0x69,0xcc,0x00,0x11,0x82,0x5c
+# GFX1250: v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x18,0x02,0x69,0xcc,0x00,0x11,0x82,0x5c]
+
+0x18,0x01,0x69,0xcc,0x00,0x11,0x82,0x3c
+# GFX1250: v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x18,0x01,0x69,0xcc,0x00,0x11,0x82,0x3c]
+
+0x18,0x00,0x7a,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] ; encoding: [0x18,0x00,0x7a,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x08,0x7a,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:1 ; encoding: [0x18,0x08,0x7a,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x20,0x7a,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_a_reuse ; encoding: [0x18,0x20,0x7a,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x40,0x7a,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_b_reuse ; encoding: [0x18,0x40,0x7a,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x00,0x79,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] ; encoding: [0x18,0x00,0x79,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x08,0x79,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:1 ; encoding: [0x18,0x08,0x79,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x20,0x79,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_a_reuse ; encoding: [0x18,0x20,0x79,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x40,0x79,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_b_reuse ; encoding: [0x18,0x40,0x79,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x00,0x78,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] ; encoding: [0x18,0x00,0x78,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x08,0x78,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:1 ; encoding: [0x18,0x08,0x78,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x20,0x78,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_a_reuse ; encoding: [0x18,0x20,0x78,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x40,0x78,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_b_reuse ; encoding: [0x18,0x40,0x78,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x00,0x77,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] ; encoding: [0x18,0x00,0x77,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x08,0x77,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:1 ; encoding: [0x18,0x08,0x77,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x20,0x77,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_a_reuse ; encoding: [0x18,0x20,0x77,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x40,0x77,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_b_reuse ; encoding: [0x18,0x40,0x77,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x00,0x67,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 ; encoding: [0x18,0x00,0x67,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x08,0x67,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 index_key:1 ; encoding: [0x18,0x08,0x67,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x20,0x67,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 matrix_a_reuse ; encoding: [0x18,0x20,0x67,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x40,0x67,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 matrix_b_reuse ; encoding: [0x18,0x40,0x67,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x02,0x67,0xcc,0x00,0x11,0x72,0x5c
+# GFX1250: v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x18,0x02,0x67,0xcc,0x00,0x11,0x72,0x5c]
+
+0x18,0x01,0x67,0xcc,0x00,0x11,0x72,0x3c
+# GFX1250: v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x18,0x01,0x67,0xcc,0x00,0x11,0x72,0x3c]
+
+0x18,0x00,0x76,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] ; encoding: [0x18,0x00,0x76,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x08,0x76,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1 ; encoding: [0x18,0x08,0x76,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x20,0x76,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse ; encoding: [0x18,0x20,0x76,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x40,0x76,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse ; encoding: [0x18,0x40,0x76,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x00,0x75,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] ; encoding: [0x18,0x00,0x75,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x08,0x75,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1 ; encoding: [0x18,0x08,0x75,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x20,0x75,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse ; encoding: [0x18,0x20,0x75,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x40,0x75,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse ; encoding: [0x18,0x40,0x75,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x00,0x74,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] ; encoding: [0x18,0x00,0x74,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x08,0x74,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1 ; encoding: [0x18,0x08,0x74,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x20,0x74,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse ; encoding: [0x18,0x20,0x74,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x40,0x74,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse ; encoding: [0x18,0x40,0x74,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x00,0x73,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] ; encoding: [0x18,0x00,0x73,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x08,0x73,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1 ; encoding: [0x18,0x08,0x73,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x20,0x73,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse ; encoding: [0x18,0x20,0x73,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x40,0x73,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse ; encoding: [0x18,0x40,0x73,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x00,0x66,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 ; encoding: [0x18,0x00,0x66,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x08,0x66,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 index_key:1 ; encoding: [0x18,0x08,0x66,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x20,0x66,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_a_reuse ; encoding: [0x18,0x20,0x66,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x40,0x66,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_b_reuse ; encoding: [0x18,0x40,0x66,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x02,0x66,0xcc,0x00,0x11,0x82,0x5c
+# GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x18,0x02,0x66,0xcc,0x00,0x11,0x82,0x5c]
+
+0x18,0x01,0x66,0xcc,0x00,0x11,0x82,0x3c
+# GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x18,0x01,0x66,0xcc,0x00,0x11,0x82,0x3c]
+
+0x18,0x00,0x65,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 ; encoding: [0x18,0x00,0x65,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x08,0x65,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 index_key:1 ; encoding: [0x18,0x08,0x65,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x20,0x65,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 matrix_a_reuse ; encoding: [0x18,0x20,0x65,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x40,0x65,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 matrix_b_reuse ; encoding: [0x18,0x40,0x65,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x02,0x65,0xcc,0x00,0x11,0x82,0x5c
+# GFX1250: v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x18,0x02,0x65,0xcc,0x00,0x11,0x82,0x5c]
+
+0x18,0x01,0x65,0xcc,0x00,0x11,0x82,0x3c
+# GFX1250: v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x18,0x01,0x65,0xcc,0x00,0x11,0x82,0x3c]
+
+0x18,0x00,0x7b,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] ; encoding: [0x18,0x00,0x7b,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x08,0x7b,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1 ; encoding: [0x18,0x08,0x7b,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x20,0x7b,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse ; encoding: [0x18,0x20,0x7b,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x40,0x7b,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse ; encoding: [0x18,0x40,0x7b,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x00,0x7b,0xcc,0x00,0x11,0x82,0x5c
+# GFX1250: v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[0,1,0] ; encoding: [0x18,0x00,0x7b,0xcc,0x00,0x11,0x82,0x5c]
+
+0x18,0x00,0x7b,0xcc,0x00,0x11,0x82,0x3c
+# GFX1250: v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[1,0,0] ; encoding: [0x18,0x00,0x7b,0xcc,0x00,0x11,0x82,0x3c]
+
+0x10,0x00,0x63,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x63,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x63,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x63,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x63,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x00,0x63,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x63,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x63,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x63,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x63,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x63,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x63,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x63,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x63,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x02,0x63,0xcc,0x00,0x11,0x42,0x5c
+# GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x10,0x02,0x63,0xcc,0x00,0x11,0x42,0x5c]
+
+0x10,0x01,0x63,0xcc,0x00,0x11,0x42,0x3c
+# GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x10,0x01,0x63,0xcc,0x00,0x11,0x42,0x3c]
+
+0x1a,0x00,0x64,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], 1.0 ; encoding: [0x1a,0x00,0x64,0xcc,0x00,0x11,0xca,0x1b]
+
+0x1a,0x00,0x64,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x1a,0x00,0x64,0xcc,0x00,0x11,0xca,0x9b]
+
+0x1a,0x00,0x64,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] ; encoding: [0x1a,0x00,0x64,0xcc,0x00,0x11,0x42,0x1c]
+
+0x1a,0x20,0x64,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x1a,0x20,0x64,0xcc,0x00,0x11,0x42,0x1c]
+
+0x1a,0x40,0x64,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x1a,0x40,0x64,0xcc,0x00,0x11,0x42,0x1c]
+
+0x1a,0x04,0x64,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x1a,0x04,0x64,0xcc,0x00,0x11,0x42,0x1c]
+
+0x1a,0x00,0x64,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x1a,0x00,0x64,0xcc,0x00,0x11,0x42,0x9c]
+
+0x1a,0x02,0x64,0xcc,0x00,0x11,0x42,0x5c
+# GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x1a,0x02,0x64,0xcc,0x00,0x11,0x42,0x5c]
+
+0x1a,0x01,0x64,0xcc,0x00,0x11,0x42,0x3c
+# GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x1a,0x01,0x64,0xcc,0x00,0x11,0x42,0x3c]
+
+0x10,0x00,0x61,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x61,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x61,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x61,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x61,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x00,0x61,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x61,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x61,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x61,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x61,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x61,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x61,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x61,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x61,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x02,0x61,0xcc,0x00,0x11,0x42,0x5c
+# GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x10,0x02,0x61,0xcc,0x00,0x11,0x42,0x5c]
+
+0x10,0x01,0x61,0xcc,0x00,0x11,0x42,0x3c
+# GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x10,0x01,0x61,0xcc,0x00,0x11,0x42,0x3c]
+
+0x10,0x00,0x71,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x71,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x71,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x71,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x71,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x00,0x71,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x71,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x71,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x71,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x71,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x71,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x71,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x71,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x71,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x00,0x70,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x70,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x70,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x70,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x70,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x00,0x70,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x70,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x70,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x70,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x70,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x70,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x70,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x70,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x70,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x00,0x6f,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x6f,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x6f,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6f,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x6f,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x00,0x6f,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x6f,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x6f,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x6f,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x6f,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x6f,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x6f,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x6f,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6f,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x00,0x6e,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x6e,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x6e,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6e,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x6e,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x00,0x6e,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x6e,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x6e,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x6e,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x6e,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x6e,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x6e,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x6e,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6e,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x00,0x62,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x62,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x62,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x62,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x62,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x62,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x62,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x62,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x62,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x62,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x62,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x62,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x62,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x62,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x02,0x62,0xcc,0x00,0x11,0x42,0x5c
+# GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x10,0x02,0x62,0xcc,0x00,0x11,0x42,0x5c]
+
+0x10,0x01,0x62,0xcc,0x00,0x11,0x42,0x3c
+# GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x10,0x01,0x62,0xcc,0x00,0x11,0x42,0x3c]
+
+0x10,0x00,0x60,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x60,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x60,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x60,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x60,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x60,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x60,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x60,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x60,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x60,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x60,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x60,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x60,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x60,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x02,0x60,0xcc,0x00,0x11,0x42,0x5c
+# GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x10,0x02,0x60,0xcc,0x00,0x11,0x42,0x5c]
+
+0x10,0x01,0x60,0xcc,0x00,0x11,0x42,0x3c
+# GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x10,0x01,0x60,0xcc,0x00,0x11,0x42,0x3c]
+
+0x04,0x00,0x5d,0xcc,0x00,0x05,0xca,0x1b
+# GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], 1.0 ; encoding: [0x04,0x00,0x5d,0xcc,0x00,0x05,0xca,0x1b]
+
+0x04,0x00,0x5d,0xcc,0x00,0x05,0xca,0x9b
+# GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], 1.0 neg_lo:[0,0,1] ; encoding: [0x04,0x00,0x5d,0xcc,0x00,0x05,0xca,0x9b]
+
+0x04,0x00,0x5d,0xcc,0x00,0x05,0x12,0x1c
+# GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] ; encoding: [0x04,0x00,0x5d,0xcc,0x00,0x05,0x12,0x1c]
+
+0x04,0x20,0x5d,0xcc,0x00,0x05,0x12,0x1c
+# GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] matrix_a_reuse ; encoding: [0x04,0x20,0x5d,0xcc,0x00,0x05,0x12,0x1c]
+
+0x04,0x40,0x5d,0xcc,0x00,0x05,0x12,0x1c
+# GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] matrix_b_reuse ; encoding: [0x04,0x40,0x5d,0xcc,0x00,0x05,0x12,0x1c]
+
+0x04,0x04,0x5d,0xcc,0x00,0x05,0x12,0x1c
+# GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1] ; encoding: [0x04,0x04,0x5d,0xcc,0x00,0x05,0x12,0x1c]
+
+0x04,0x00,0x5d,0xcc,0x00,0x05,0x12,0x9c
+# GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1] ; encoding: [0x04,0x00,0x5d,0xcc,0x00,0x05,0x12,0x9c]
+
+0x04,0x00,0x5d,0xcc,0x00,0x05,0x12,0x5c
+# GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0] ; encoding: [0x04,0x00,0x5d,0xcc,0x00,0x05,0x12,0x5c]
+
+0x04,0x00,0x5d,0xcc,0x00,0x05,0x12,0x3c
+# GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0] ; encoding: [0x04,0x00,0x5d,0xcc,0x00,0x05,0x12,0x3c]
+
+0x10,0x00,0x6d,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x6d,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x6d,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6d,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x6d,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x6d,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x6d,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x6d,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x6d,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x6d,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x6d,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x6d,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x6d,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6d,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x00,0x6c,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x6c,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x6c,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6c,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x6c,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x6c,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x6c,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x6c,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x6c,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x6c,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x6c,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x6c,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x6c,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6c,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x00,0x6b,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x6b,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x6b,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6b,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x6b,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x6b,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x6b,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x6b,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x6b,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x6b,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x6b,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x6b,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x6b,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6b,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x00,0x6a,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x6a,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x6a,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6a,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x6a,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x6a,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x6a,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x6a,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x6a,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x6a,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x6a,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x6a,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x6a,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6a,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x00,0x72,0xcc,0x00,0x11,0x06,0x1a
+# GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], 1 ; encoding: [0x10,0x00,0x72,0xcc,0x00,0x11,0x06,0x1a]
+
+0x10,0x00,0x72,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x72,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x72,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x72,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x72,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x72,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x72,0xcc,0x00,0x11,0x42,0x5c
+# GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] ; encoding: [0x10,0x00,0x72,0xcc,0x00,0x11,0x42,0x5c]
+
+0x10,0x00,0x72,0xcc,0x00,0x11,0x42,0x3c
+# GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] ; encoding: [0x10,0x00,0x72,0xcc,0x00,0x11,0x42,0x3c]
+
+0x10,0x00,0x87,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x87,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x87,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x87,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x87,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] ; encoding: [0x10,0x00,0x87,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x87,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x87,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x87,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x87,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x87,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x87,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x87,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x87,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x00,0x86,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x86,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x86,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x86,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x86,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] ; encoding: [0x10,0x00,0x86,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x86,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x86,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x86,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x86,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x86,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x86,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x86,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x86,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x00,0x85,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x85,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x85,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x85,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x85,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] ; encoding: [0x10,0x00,0x85,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x85,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x85,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x85,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x85,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x85,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x85,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x85,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x85,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x00,0x84,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x84,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x84,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x84,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x84,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] ; encoding: [0x10,0x00,0x84,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x84,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x84,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x84,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x84,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x84,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x84,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x84,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x84,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x00,0x83,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x83,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x83,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x83,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x83,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] ; encoding: [0x10,0x00,0x83,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x83,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x83,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x83,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x83,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x83,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x83,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x83,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x83,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x00,0x82,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x82,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x82,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x82,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x82,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] ; encoding: [0x10,0x00,0x82,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x82,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x82,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x82,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x82,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x82,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x82,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x82,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x82,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x00,0x81,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x81,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x81,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x81,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x81,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] ; encoding: [0x10,0x00,0x81,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x81,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x81,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x81,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x81,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x81,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x81,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x81,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x81,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x00,0x80,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x80,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x80,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x80,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x80,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] ; encoding: [0x10,0x00,0x80,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x80,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x80,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x80,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x80,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x80,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x80,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x80,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x80,0xcc,0x00,0x11,0x42,0x9c]
+
+0x04,0x40,0x88,0xcc,0x00,0x05,0xca,0x1b
+# GFX1250: v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], 1.0 ; encoding: [0x04,0x40,0x88,0xcc,0x00,0x05,0xca,0x1b]
+
+0x04,0x40,0x88,0xcc,0x00,0x05,0xca,0x9b
+# GFX1250: v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], 1.0 neg_lo:[0,0,1] ; encoding: [0x04,0x40,0x88,0xcc,0x00,0x05,0xca,0x9b]
+
+0x04,0x40,0x88,0xcc,0x00,0x05,0x12,0x1c
+# GFX1250: v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] ; encoding: [0x04,0x40,0x88,0xcc,0x00,0x05,0x12,0x1c]
+
+0x04,0x44,0x88,0xcc,0x00,0x05,0x12,0x1c
+# GFX1250: v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_hi:[0,0,1] ; encoding: [0x04,0x44,0x88,0xcc,0x00,0x05,0x12,0x1c]
+
+0x04,0x40,0x88,0xcc,0x00,0x05,0x12,0x9c
+# GFX1250: v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_lo:[0,0,1] ; encoding: [0x04,0x40,0x88,0xcc,0x00,0x05,0x12,0x9c]
+
+0x04,0x44,0x88,0xcc,0x00,0x05,0x12,0x9c
+# GFX1250: v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x04,0x44,0x88,0xcc,0x00,0x05,0x12,0x9c]


        


More information about the llvm-commits mailing list