[llvm] b80b025 - AMDGPU: Implement MC layer support for gfx1250 wmma instructions. (#148570)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 15 00:49:01 PDT 2025
Author: Changpeng Fang
Date: 2025-07-15T00:48:57-07:00
New Revision: b80b02536b20d2170820e56ef776f43640ee2495
URL: https://github.com/llvm/llvm-project/commit/b80b02536b20d2170820e56ef776f43640ee2495
DIFF: https://github.com/llvm/llvm-project/commit/b80b02536b20d2170820e56ef776f43640ee2495.diff
LOG: AMDGPU: Implement MC layer support for gfx1250 wmma instructions. (#148570)
Regular wmma/swmmac plus matrix reuse only.
---------
Co-authored-by: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Co-authored-by: Shilei Tian <Shilei.Tian at amd.com>
Added:
llvm/test/MC/AMDGPU/gfx1250_asm_wmma_w32.s
llvm/test/MC/AMDGPU/gfx1250_asm_wmma_w32_err.s
llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_wmma_w32.txt
Modified:
llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.td
llvm/lib/Target/AMDGPU/SIRegisterInfo.td
llvm/lib/Target/AMDGPU/VOP3PInstructions.td
llvm/lib/Target/AMDGPU/VOPInstructions.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 6439230b8769f..8ac9b707c1349 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -157,6 +157,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
ImmTyNegHi,
ImmTyIndexKey8bit,
ImmTyIndexKey16bit,
+ ImmTyIndexKey32bit,
ImmTyDPP8,
ImmTyDppCtrl,
ImmTyDppRowMask,
@@ -174,8 +175,10 @@ class AMDGPUOperand : public MCParsedAsmOperand {
ImmTyWaitEXP,
ImmTyWaitVAVDst,
ImmTyWaitVMVSrc,
- ImmTyByteSel,
ImmTyBitOp3,
+ ImmTyMatrixAReuse,
+ ImmTyMatrixBReuse,
+ ImmTyByteSel,
};
// Immediate operand kind.
@@ -419,6 +422,9 @@ class AMDGPUOperand : public MCParsedAsmOperand {
bool isCPol() const { return isImmTy(ImmTyCPol); }
bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
+ bool isIndexKey32bit() const { return isImmTy(ImmTyIndexKey32bit); }
+ bool isMatrixAReuse() const { return isImmTy(ImmTyMatrixAReuse); }
+ bool isMatrixBReuse() const { return isImmTy(ImmTyMatrixBReuse); }
bool isTFE() const { return isImmTy(ImmTyTFE); }
bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
bool isDppFI() const { return isImmTy(ImmTyDppFI); }
@@ -747,6 +753,10 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
}
+ bool isVISrc_512_f64() const {
+ return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
+ }
+
bool isVISrc_128B16() const {
return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
}
@@ -1116,6 +1126,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
case ImmTyCPol: OS << "CPol"; break;
case ImmTyIndexKey8bit: OS << "index_key"; break;
case ImmTyIndexKey16bit: OS << "index_key"; break;
+ case ImmTyIndexKey32bit: OS << "index_key"; break;
case ImmTyTFE: OS << "TFE"; break;
case ImmTyD16: OS << "D16"; break;
case ImmTyFORMAT: OS << "FORMAT"; break;
@@ -1162,8 +1173,10 @@ class AMDGPUOperand : public MCParsedAsmOperand {
case ImmTyWaitEXP: OS << "WaitEXP"; break;
case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
- case ImmTyByteSel: OS << "ByteSel" ; break;
case ImmTyBitOp3: OS << "BitOp3"; break;
+ case ImmTyMatrixAReuse: OS << "ImmTyMatrixAReuse"; break;
+ case ImmTyMatrixBReuse: OS << "ImmTyMatrixBReuse"; break;
+ case ImmTyByteSel: OS << "ByteSel" ; break;
}
// clang-format on
}
@@ -1700,6 +1713,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
AMDGPUOperand::ImmTy ImmTy);
ParseStatus parseIndexKey8bit(OperandVector &Operands);
ParseStatus parseIndexKey16bit(OperandVector &Operands);
+ ParseStatus parseIndexKey32bit(OperandVector &Operands);
ParseStatus parseDfmtNfmt(int64_t &Format);
ParseStatus parseUfmt(int64_t &Format);
@@ -7153,7 +7167,9 @@ ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
if (!Res.isSuccess())
return Res;
- if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1))
+ if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
+ ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
+ (ImmVal < 0 || ImmVal > 1))
return Error(Loc, Twine("out of range ", StringRef(Pref)));
if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
@@ -7171,6 +7187,10 @@ ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
}
+ParseStatus AMDGPUAsmParser::parseIndexKey32bit(OperandVector &Operands) {
+ return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);
+}
+
// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
// values to live in a joint format operand in the MCInst encoding.
ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
@@ -9272,6 +9292,14 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
DefaultVal);
}
+ if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_a_reuse))
+ addOptionalImmOperand(Inst, Operands, OptIdx,
+ AMDGPUOperand::ImmTyMatrixAReuse, 0);
+
+ if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_b_reuse))
+ addOptionalImmOperand(Inst, Operands, OptIdx,
+ AMDGPUOperand::ImmTyMatrixBReuse, 0);
+
int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
if (NegLoIdx != -1)
addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
@@ -9378,6 +9406,10 @@ void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
addOptionalImmOperand(Inst, Operands, OptIdx,
AMDGPUOperand::ImmTyIndexKey16bit);
+ if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_32bit))
+ addOptionalImmOperand(Inst, Operands, OptIdx,
+ AMDGPUOperand::ImmTyIndexKey32bit);
+
if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index cb6319ed627ca..ec9248b972ec4 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -1332,6 +1332,16 @@ void AMDGPUInstPrinter::printIndexKey16bit(const MCInst *MI, unsigned OpNo,
O << " index_key:" << Imm;
}
+void AMDGPUInstPrinter::printIndexKey32bit(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ auto Imm = MI->getOperand(OpNo).getImm() & 0x7;
+ if (Imm == 0)
+ return;
+
+ O << " index_key:" << Imm;
+}
+
void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index fb803b1f81342..e3299a618e882 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -132,6 +132,8 @@ class AMDGPUInstPrinter : public MCInstPrinter {
const MCSubtargetInfo &STI, raw_ostream &O);
void printIndexKey16bit(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printIndexKey32bit(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printInterpSlot(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printInterpAttr(const MCInst *MI, unsigned OpNo,
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
index 4bb3942936f04..b87bb94c70dcb 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
@@ -381,9 +381,11 @@ void AMDGPUMCCodeEmitter::encodeInstruction(const MCInst &MI,
// Set unused op_sel_hi bits to 1 for VOP3P and MAI instructions.
// Note that accvgpr_read/write are MAI, have src0, but do not use op_sel.
- if ((Desc.TSFlags & SIInstrFlags::VOP3P) ||
- Opcode == AMDGPU::V_ACCVGPR_READ_B32_vi ||
- Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_vi) {
+ if (((Desc.TSFlags & SIInstrFlags::VOP3P) ||
+ Opcode == AMDGPU::V_ACCVGPR_READ_B32_vi ||
+ Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_vi) &&
+ // Matrix B reuse operand reuses op_sel_hi.
+ !AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::matrix_b_reuse)) {
Encoding |= getImplicitOpSelHiEncoding(Opcode);
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index e5ce0a9d0e971..391cc968e9d8b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1247,6 +1247,7 @@ def op_sel_hi0 : ArrayOperand0<"op_sel_hi", "OpSelHi">;
def neg_lo0 : ArrayOperand0<"neg_lo", "NegLo">;
def neg_hi0 : ArrayOperand0<"neg_hi", "NegHi">;
+def IndexKey32bit : CustomOperand<i32, 1>;
def IndexKey16bit : CustomOperand<i32, 1>;
def IndexKey8bit : CustomOperand<i32, 1>;
@@ -1302,6 +1303,9 @@ let PrintMethod = "printBitOp3" in
def BitOp3 : NamedIntOperand<"bitop3">;
def bitop3_0 : DefaultOperand<BitOp3, 0>;
+def MatrixAReuse : NamedBitOperand<"matrix_a_reuse">;
+def MatrixBReuse : NamedBitOperand<"matrix_b_reuse">;
+
class KImmFPOperand<ValueType vt> : ImmOperand<vt> {
let OperandNamespace = "AMDGPU";
let OperandType = "OPERAND_KIMM"#vt.Size;
@@ -1641,6 +1645,7 @@ def WMMAModsF16NegAbs : ComplexPattern<untyped, 2, "SelectWMMAModsF16NegAbs">;
def WMMAVISrc : ComplexPattern<untyped, 1, "SelectWMMAVISrc">;
def SWMMACIndex8 : ComplexPattern<untyped, 2, "SelectSWMMACIndex8">;
def SWMMACIndex16 : ComplexPattern<untyped, 2, "SelectSWMMACIndex16">;
+def SWMMACIndex32 : ComplexPattern<untyped, 2, "SelectSWMMACIndex32">;
def VOP3OpSel : ComplexPattern<untyped, 2, "SelectVOP3OpSel">;
@@ -2654,6 +2659,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
isModifierType<Src2VT>.ret,
HasOMod);
field bit HasNeg = HasModifiers;
+ field bit HasMatrixReuse = 0;
field bit HasSrc0Mods = HasModifiers;
field bit HasSrc1Mods = !if(HasModifiers, !or(HasSrc1FloatMods, HasSrc1IntMods), 0);
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index d24c301fc1e51..c194e5c255d4d 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -1294,6 +1294,7 @@ def VISrc_256_f32 : SrcRegOrImm9 <VReg_256, "OPERAND_REG_INLINE_C_FP32">;
def VISrc_256_f64 : SrcRegOrImm9 <VReg_256, "OPERAND_REG_INLINE_C_FP64">;
def VISrc_512_b32 : SrcRegOrImm9 <VReg_512, "OPERAND_REG_INLINE_C_INT32">;
def VISrc_512_f32 : SrcRegOrImm9 <VReg_512, "OPERAND_REG_INLINE_C_FP32">;
+def VISrc_512_f64 : SrcRegOrImm9 <VReg_512, "OPERAND_REG_INLINE_C_FP64">;
def VISrc_1024_b32 : SrcRegOrImm9 <VReg_1024, "OPERAND_REG_INLINE_C_INT32">;
def VISrc_1024_f32 : SrcRegOrImm9 <VReg_1024, "OPERAND_REG_INLINE_C_FP32">;
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 31997f803dfc6..41878a71ad3e6 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -1223,6 +1223,8 @@ class WMMAOpcodeMapping<Instruction TwoAddr, Instruction ThreeAddr> {
Instruction Opcode2Addr = TwoAddr;
Instruction Opcode3Addr = ThreeAddr;
Predicate WaveSizePredicate;
+ Predicate SubtargetPredicate;
+ field bit is_wmma_xdl;
}
def WMMAOpcode : GenericEnum {
@@ -1315,28 +1317,39 @@ let WaveSizePredicate = isWave64 in {
}
class VOP3PWMMA_Profile<list<ValueType> ArgTy, bit _IsSWMMAC, int _IndexType,
- bit _IsIU, bit _IsFP8BF8>
+ bit _IsIU, bit _IsFP8BF8XF32, bit _Has_ImodOp = 0,
+ bit _HasMatrixReuse = 0, bit _IsF4 = 0>
: VOP3P_Profile<VOPProfile<ArgTy>> {
bit IsIU = _IsIU;
- bit IsFP8BF8 = _IsFP8BF8;
- bit IsF16BF16 = !not(!or(IsIU, IsFP8BF8));
+ bit NoABMods = !or(_IsFP8BF8XF32, _IsF4); // No IMOD support for A and B
+ bit IsXF32 = !and(_IsFP8BF8XF32, !eq(ArgTy[1], v8f32));
int IndexType = _IndexType;
+ let HasMatrixReuse = _HasMatrixReuse;
+ bit HasIModOp = _Has_ImodOp;
+ let HasClamp = !and(IsIU, !not(HasIModOp));
let IsPacked = 1;
let IsWMMA = !not(_IsSWMMAC);
let IsSWMMAC = _IsSWMMAC;
- bit IsAB_F16 = !and(IsF16BF16, ArgTy[1].isFP);
- bit IsAB_BF16 = !and(IsF16BF16, isIntType<ArgTy[1]>.ret);
+ bit IsAB_F64 = !or(!eq(ArgTy[1], v2f64), !eq(ArgTy[1], v4f64));
+ bit IsAB_F32 = !eq(ArgTy[1], v2f32);
+ bit IsAB_F16 = !or(!eq(ArgTy[1], v16f16), !eq(ArgTy[1], v8f16), !eq(ArgTy[1], v4f16));
+ bit IsAB_BF16 = !or(!eq(ArgTy[1], v16i16), !eq(ArgTy[1], v8i16), !eq(ArgTy[1], v4i16),
+ !eq(ArgTy[1], v16bf16), !eq(ArgTy[1], v8bf16), !eq(ArgTy[1], v4bf16));
+ bit IsF16BF16 = !or(IsAB_F16, IsAB_BF16);
+
+ bit IsC_F64 = !eq(ArgTy[3], v8f64);
bit IsC_F32 = !or(!eq(ArgTy[3], v8f32), !eq(ArgTy[3], v4f32));
- bit IsC_BF16 = !or(!eq(ArgTy[3], v8i16), !eq(ArgTy[3], v4i16));
+ bit IsC_BF16 = !or(!eq(ArgTy[3], v8i16), !eq(ArgTy[3], v4i16),
+ !eq(ArgTy[3], v8bf16), !eq(ArgTy[3], v4bf16));
bit IsC_F16 = !or(!eq(ArgTy[3], v8f16), !eq(ArgTy[3], v4f16));
- bit NegLo01 = !or(IsF16BF16, IsIU);
- bit NegLo2 = !and(!or(IsF16BF16, IsFP8BF8), IsWMMA);
- bit NegHi01 = IsF16BF16;
- bit NegHi2 = !and(!or(IsF16BF16, IsFP8BF8), IsWMMA);
+ bit NegLo01 = !not(NoABMods);
+ bit NegLo2 = !and(!not(IsIU), !not(IsXF32), IsWMMA);
+ bit NegHi01 = IsF16BF16; // Only F16BF16 can have neg_hi[0:1]
+ bit NegHi2 = !and(!not(IsIU), !not(IsXF32), IsWMMA);
bit NegLoAny = !or(NegLo01, NegLo2);
bit NegHiAny = !or(NegHi01, NegHi2);
@@ -1345,19 +1358,29 @@ class VOP3PWMMA_Profile<list<ValueType> ArgTy, bit _IsSWMMAC, int _IndexType,
let Src1RC64 = !cast<RegisterOperand>("VRegSrc_"#ArgTy[2].Size);
let Src2RC64 = !if(IsSWMMAC, DstRC,
!cast<RegisterOperand>("VISrc_"#ArgTy[3].Size#
- !cond(IsC_F32: "_f32",
- IsC_F16: "_f16",
+ !cond(IsC_F64: "_f64",
+ IsC_F32: "_f32",
+ IsC_F16: "_f16",
IsC_BF16: "_bf16",
1: "_b32")));
// For f16 and bf16 matrices A and B, each element can be modified by
- // fneg(neg_lo,neg_hi = 1). For iu4 and iu8 matrices A and B neg_lo is
+ // fneg(neg_lo,neg_hi = 1). For f32 and f64, neg_lo[0:1] is allowed, but
+ // neg_hi[0:1] is ignored. For iu4 and iu8 matrices A and B neg_lo is
// overloaded to mean unsigned/signed: neg_lo = 0 (u4 and u8) unsigned(zext)
- // neg_lo = 1 (i4 and i8) signed(sext). For f16, bf16 and f32 matrix C each
- // element can be modified by fneg(neg_lo = 1) or fabs(neg_hi = 1).
+ // neg_lo = 1 (i4 and i8) signed(sext). For f16, bf16, f32 and f64 matrix C
+ // each element can be modified by fneg(neg_lo = 1) or fabs(neg_hi = 1).
// Opcode | src0/src1 - matrix A/B | src2 - matrix C or Index
// ---------------------------------------------------------------------------
+ // wmma f64_f64 | neg_lo for neg A/B | neg_lo = 1 neg C(f64)
+ // | neg_hi ignored | neg_hi = 1 abs C(f64)
+ // ---------------------------------------------------------------------------
+ // wmma f32_f32 | neg_lo for neg A/B | neg_lo = 1 neg C(f32)
+ // | neg_hi ignored | neg_hi = 1 abs C(f32)
+ // ---------------------------------------------------------------------------
+ // wmma f32_xf32 | not allowed for xf32 | not allowed
+ // ---------------------------------------------------------------------------
// wmma f32_f16 | both neg_lo,neg_hi = 1 | neg_lo = 1 neg C(f32)
// wmma f32_bf16 | neg A/B (f16 or bf16) | neg_hi = 1 abs C(f32)
// ---------------------------------------------------------------------------
@@ -1368,7 +1391,10 @@ class VOP3PWMMA_Profile<list<ValueType> ArgTy, bit _IsSWMMAC, int _IndexType,
// | neg_lo = 1 i4/i8(sext) | i32 matrices
// ---------------------------------------------------------------------------
// wmma f32_fp8/bf8 | not allowed for | neg_lo = 1 neg C(f32)
- // (4 instructions) | f8 and bf8 matrices | neg_hi = 1 abs C(f32)
+ // | fp8 and bf8 matrices | neg_hi = 1 abs C(f32)
+ // ---------------------------------------------------------------------------
+ // wmma f16_fp8/bf8 | not allowed for | neg_lo = 1 neg C(f16)
+ // | fp8 and bf8 matrices | neg_hi = 1 abs C(f16)
// ---------------------------------------------------------------------------
// swmmac f32_f16 | both neg_lo,neg_hi = 1 | not allowed for sparse matrix
// swmmac f32_bf16 | neg A/B (f16 or bf16) | A Index - matrix C is in dst
@@ -1380,78 +1406,108 @@ class VOP3PWMMA_Profile<list<ValueType> ArgTy, bit _IsSWMMAC, int _IndexType,
// | neg_lo = 1 i4/i8(sext) | A Index - matrix C is in dst
// ---------------------------------------------------------------------------
// swmmac f32_fp8/bf8 | not allowed for | not allowed for sparse matrix
- // (4 instructions) | f8 and bf8 matrices | A Index - matrix C is in dst
+ // swmmac f16_fp8/bf8 | f8 and bf8 matrices | A Index - matrix C is in dst
+ // ---------------------------------------------------------------------------
// pseudo
- // fp8bf8 wmmas don't use src (0 and 1) modifiers, iu use neg_lo, f16 and bf16
+ // fp8bf8 and xf32 wmmas don't use src (0 and 1) modifiers, iu use neg_lo, f16 and bf16
// use neg_lo and neg_hi. iu wmmas (C is i32) don't use src 2 modifiers,
// remaining wmmas(f16, bf16 and f8bf8) use neg_lo and neg_hi for C (C is f32
// f16 or bf16). swmmac use index_key and don't use src 2 modifiers.
-
- dag Src0Mods = !if(IsFP8BF8, (ins), (ins PackedF16InputMods:$src0_modifiers));
- dag Src1Mods = !if(IsFP8BF8, (ins), (ins PackedF16InputMods:$src1_modifiers));
- dag Src2Mods = !if(IsIU, (ins), (ins PackedF16InputMods:$src2_modifiers));
+ dag Src0Mods = !if(NoABMods, (ins), (ins PackedF16InputMods:$src0_modifiers));
+ dag Src1Mods = !if(NoABMods, (ins), (ins PackedF16InputMods:$src1_modifiers));
+ dag Src2Mods = !if(!or(IsIU, IsXF32, IsSWMMAC), (ins), (ins PackedF16InputMods:$src2_modifiers));
dag IndexKey = !cond(!eq(IndexType, 0) : (ins),
!eq(IndexType, 8) : (ins IndexKey8bit:$index_key_8bit),
- !eq(IndexType, 16): (ins IndexKey16bit:$index_key_16bit));
- dag Clamp = !if(IsIU, (ins Clamp0:$clamp), (ins));
+ !eq(IndexType, 16): (ins IndexKey16bit:$index_key_16bit),
+ !eq(IndexType, 32): (ins IndexKey32bit:$index_key_32bit));
+
+ dag MatrixReuse = !if(HasMatrixReuse, (ins MatrixAReuse:$matrix_a_reuse, MatrixBReuse:$matrix_b_reuse), (ins));
+ dag Clamp = !if(HasClamp, (ins Clamp0:$clamp), (ins));
dag Neg = !cond(!and(NegLoAny, NegHiAny) : (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi),
!and(NegLoAny, !not(NegHiAny)) : (ins neg_lo0:$neg_lo),
!and(!not(NegLoAny), !not(NegHiAny)) : (ins));
let InsVOP3P = !con(Src0Mods, (ins Src0RC64:$src0), Src1Mods, (ins Src1RC64:$src1),
!cond(IsWMMA : !con(Src2Mods, (ins Src2RC64:$src2)),
- IsSWMMAC : !con((ins DstRC:$srcTiedDef), (ins VRegSrc_32:$src2), IndexKey)),
- Clamp, Neg);
+ IsSWMMAC : !con((ins DstRC:$srcTiedDef),
+ !if(!eq(IndexType, 32),
+ (ins VRegSrc_64:$src2),
+ (ins VRegSrc_32:$src2)),
+ IndexKey)),
+ MatrixReuse, Clamp, Neg);
// asm
string IndexKeyAsm = !cond(!eq(IndexType, 0) : "",
!eq(IndexType, 8) : "$index_key_8bit",
- !eq(IndexType, 16) : "$index_key_16bit");
- string ClampAsm = !if(IsIU, "$clamp", "");
+ !eq(IndexType, 16) : "$index_key_16bit",
+ !eq(IndexType, 32) : "$index_key_32bit");
+ string MatrixReuseAsm = !if(HasMatrixReuse, "$matrix_a_reuse$matrix_b_reuse", "");
+ string ClampAsm = !if(HasClamp, "$clamp", "");
string NegAsm = !cond(!and(NegLoAny, NegHiAny) : "$neg_lo$neg_hi",
!and(NegLoAny, !not(NegHiAny)) : "$neg_lo",
!and(!not(NegLoAny), !not(NegHiAny)) : "");
- let AsmVOP3P = "$vdst, $src0, $src1, $src2"#IndexKeyAsm#NegAsm#ClampAsm;
+ let AsmVOP3P = "$vdst, $src0, $src1, $src2"#IndexKeyAsm#MatrixReuseAsm#NegAsm#ClampAsm;
// isel patterns
+ bit IsAB_BF16_IMod0 = !and(IsAB_BF16, !not(HasIModOp));
+ bit IsAB_F16_IMod0 = !and(IsAB_F16, !not(HasIModOp));
+ bit IsAB_F32F64_IMod1 = !and(!or(IsAB_F64, IsAB_F32), HasIModOp);
+ bit IsAB_F16BF16_IMod1 = !and(!or(IsAB_F16, IsAB_BF16), HasIModOp);
+ dag Src0InPat = !cond(IsAB_F32F64_IMod1 : (ins Src0VT:$src0),
+ IsAB_F16BF16_IMod1 : (ins Src0VT:$src0),
+ IsAB_F16_IMod0 : (ins (Src0VT (WMMAModsF16Neg Src0VT:$src0, i32:$src0_modifiers))),
+ IsAB_BF16_IMod0 : (ins Src0VT:$src0),
+ IsIU : (ins (VOP3PModsNeg i32:$src0_modifiers), Src0VT:$src0),
+ NoABMods : (ins Src0VT:$src0));
+ dag Src0OutPat = !cond(IsAB_F32F64_IMod1 : (ins Src0VT:$src0),
+ IsAB_F16BF16_IMod1 : (ins Src0VT:$src0),
+ IsAB_F16_IMod0 : (ins i32:$src0_modifiers, Src0VT:$src0),
+ IsAB_BF16_IMod0 : (ins (i32 8), Src0VT:$src0),
+ IsIU : (ins i32:$src0_modifiers, Src0VT:$src0),
+ NoABMods : (ins Src0VT:$src0));
+ dag Src1InPat = !cond(IsAB_F32F64_IMod1 : (ins Src1VT:$src1),
+ IsAB_F16BF16_IMod1 : (ins Src1VT:$src1),
+ IsAB_F16_IMod0 : (ins (Src1VT (WMMAModsF16Neg Src1VT:$src1, i32:$src1_modifiers))),
+ IsAB_BF16_IMod0 : (ins Src1VT:$src1),
+ IsIU : (ins (VOP3PModsNeg i32:$src1_modifiers), Src1VT:$src1),
+ NoABMods : (ins Src1VT:$src1));
+ dag Src1OutPat = !cond(IsAB_F32F64_IMod1 : (ins Src1VT:$src1),
+ IsAB_F16BF16_IMod1 : (ins Src1VT:$src1),
+ IsAB_F16_IMod0 : (ins i32:$src1_modifiers, Src1VT:$src1),
+ IsAB_BF16_IMod0 : (ins (i32 8), Src1VT:$src1),
+ IsIU : (ins i32:$src1_modifiers, Src1VT:$src1),
+ NoABMods : (ins Src1VT:$src1));
+ bit IsC_IMod1 = !and(HasIModOp, IsWMMA, !not(IsIU), !not(IsXF32));
+ bit IsC_F32_IMod0 = !and(IsC_F32, !not(HasIModOp));
+ bit IsC_F16_IMod0 = !and(IsC_F16, !not(HasIModOp));
+ bit IsC_BF16_IMod0 = !and(IsC_BF16, !not(HasIModOp));
+ bit IsIUXF32 = !or(IsIU, IsXF32);
+ dag Src2InPatWmma = !cond(IsC_IMod1 : (ins Src2VT:$src2),
+ IsC_F32_IMod0 : (ins (Src2VT (WMMAModsF32NegAbs Src2VT:$src2, i32:$src2_modifiers))),
+ IsC_F16_IMod0 : (ins (Src2VT (WMMAModsF16NegAbs Src2VT:$src2, i32:$src2_modifiers))),
+ IsC_BF16_IMod0 : (ins Src2VT:$src2),
+ IsIUXF32 : (ins Src2VT:$src2),
+ IsSWMMAC : (ins));
+ dag Src2OutPatWmma = !cond(IsC_IMod1 : (ins Src2VT:$src2),
+ IsC_F32_IMod0 : (ins i32:$src2_modifiers, Src2VT:$src2),
+ IsC_F16_IMod0 : (ins i32:$src2_modifiers, Src2VT:$src2),
+ IsC_BF16_IMod0 : (ins (i32 8), Src2VT:$src2),
+ IsIUXF32 : (ins Src2VT:$src2),
+ IsSWMMAC : (ins));
+ dag ClampPat = !if(HasClamp, (ins i1:$clamp), (ins));
- dag Src0InPat = !cond(IsAB_F16 : (ins (Src0VT (WMMAModsF16Neg Src0VT:$src0, i32:$src0_modifiers))),
- IsAB_BF16 : (ins Src0VT:$src0),
- IsIU : (ins (VOP3PModsNeg i32:$src0_modifiers), Src0VT:$src0),
- IsFP8BF8 : (ins Src0VT:$src0));
- dag Src0OutPat = !cond(IsAB_F16 : (ins i32:$src0_modifiers, Src0VT:$src0),
- IsAB_BF16 : (ins (i32 8), Src0VT:$src0),
- IsIU : (ins i32:$src0_modifiers, Src0VT:$src0),
- IsFP8BF8 : (ins Src0VT:$src0));
- dag Src1InPat = !cond(IsAB_F16 : (ins (Src1VT (WMMAModsF16Neg Src1VT:$src1, i32:$src1_modifiers))),
- IsAB_BF16 : (ins Src1VT:$src1),
- IsIU : (ins (VOP3PModsNeg i32:$src1_modifiers), Src1VT:$src1),
- IsFP8BF8 : (ins Src1VT:$src1));
- dag Src1OutPat = !cond(IsAB_F16 : (ins i32:$src1_modifiers, Src1VT:$src1),
- IsAB_BF16 : (ins (i32 8), Src1VT:$src1),
- IsIU : (ins i32:$src1_modifiers, Src1VT:$src1),
- IsFP8BF8 : (ins Src1VT:$src1));
- dag Src2InPatWmma = !cond(IsC_F32 : (ins (Src2VT (WMMAModsF32NegAbs Src2VT:$src2, i32:$src2_modifiers))),
- IsC_F16 : (ins (Src2VT (WMMAModsF16NegAbs Src2VT:$src2, i32:$src2_modifiers))),
- IsC_BF16 : (ins Src2VT:$src2),
- IsIU : (ins Src2VT:$src2),
- IsSWMMAC : (ins));
- dag Src2OutPatWmma = !cond(IsC_F32 : (ins i32:$src2_modifiers, Src2VT:$src2),
- IsC_F16 : (ins i32:$src2_modifiers, Src2VT:$src2),
- IsC_BF16 : (ins (i32 8), Src2VT:$src2),
- IsIU : (ins Src2VT:$src2),
- IsSWMMAC : (ins));
- dag ClampPat = !if(IsIU, (ins i1:$clamp), (ins));
dag IndexInPat = !cond(!eq(IndexType, 0) : (ins i32:$src2),
!eq(IndexType, 8) : (ins (i32 (SWMMACIndex8 i32:$src2, i32:$index_key_8bit))),
- !eq(IndexType, 16): (ins (i32 (SWMMACIndex16 i32:$src2, i32:$index_key_16bit))));
+ !eq(IndexType, 16): (ins (i32 (SWMMACIndex16 i32:$src2, i32:$index_key_16bit))),
+ !eq(IndexType, 32): (ins (i64 (SWMMACIndex32 i64:$src2, i32:$index_key_32bit))));
dag IndexOutPat = !cond(!eq(IndexType, 0) : (ins i32:$src2),
!eq(IndexType, 8) : (ins i32:$src2, i32:$index_key_8bit),
- !eq(IndexType, 16): (ins i32:$src2, i32:$index_key_16bit));
+ !eq(IndexType, 16): (ins i32:$src2, i32:$index_key_16bit),
+ !eq(IndexType, 32): (ins i64:$src2, i32:$index_key_32bit));
dag Src2InlineInPat = (ins (Src2VT (WMMAVISrc Src2VT:$src2)));
dag Src2InlineOutPat = !con(!if(IsIU, (ins), (ins (i32 8))), (ins Src2VT:$src2));
@@ -1468,15 +1524,33 @@ class VOP3PWMMA_Profile<list<ValueType> ArgTy, bit _IsSWMMAC, int _IndexType,
dag WmmaInlineOutPat = !con(Src0OutPat, Src1OutPat, Src2InlineOutPat, ClampPat);
}
-multiclass WMMAInstGFX12<string Instr, VOP3PWMMA_Profile WMMAProfile, string PseudoInstrSuffix> {
+def WMMAInstInfoTable : GenericTable {
+ let FilterClass = "WMMAInstInfo";
+ let CppTypeName = "WMMAInstInfo";
+ let Fields = ["Opcode", "is_wmma_xdl"];
+
+ let PrimaryKey = ["Opcode"];
+ let PrimaryKeyName = "getWMMAInstInfoHelper";
+}
+
+class WMMAInstInfo {
+ Instruction Opcode = !cast<Instruction>(NAME);
+ bit is_wmma_xdl = 0;
+}
+
+multiclass WMMAInstGFX12<string Instr, VOP3PWMMA_Profile WMMAProfile, string PseudoInstrSuffix, bit DiffVdstSrc2 = 0> {
+
+ defvar WMMAConstraints2Addr = !if(DiffVdstSrc2, "@earlyclobber $vdst", "@earlyclobber $vdst,$vdst = $src2");
+ defvar WMMAConstraints3Addr = "@earlyclobber $vdst";
+
let Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in {
- let Constraints = "@earlyclobber $vdst,$vdst = $src2", isConvertibleToThreeAddress = 1 in
- def _twoaddr : VOP3P_Pseudo<Instr, WMMAProfile>{
+ let Constraints = WMMAConstraints2Addr, isConvertibleToThreeAddress = 1 in
+ def _twoaddr : VOP3P_Pseudo<Instr, WMMAProfile>, WMMAInstInfo {
let PseudoInstr = Instr#PseudoInstrSuffix;
}
- let Constraints = "@earlyclobber $vdst", SchedRW = [Write32Bit, Write32Bit] in
- def _threeaddr : VOP3P_Pseudo<Instr, WMMAProfile>{
+ let Constraints = WMMAConstraints3Addr, SchedRW = [Write32Bit, Write32Bit] in
+ def _threeaddr : VOP3P_Pseudo<Instr, WMMAProfile>, WMMAInstInfo {
let PseudoInstr = Instr#PseudoInstrSuffix;
}
@@ -1486,7 +1560,7 @@ multiclass WMMAInstGFX12<string Instr, VOP3PWMMA_Profile WMMAProfile, string Pse
}
multiclass SWMMACInstGFX12<string Instr, VOP3PWMMA_Profile WMMAProfile, string PseudoInstrSuffix> {
- def _twoaddr : VOP3P_Pseudo<Instr, WMMAProfile>{
+ def _twoaddr : VOP3P_Pseudo<Instr, WMMAProfile>, WMMAInstInfo {
let Mnemonic = Instr;
let PseudoInstr = Instr#PseudoInstrSuffix;
let mayRaiseFPException = 0;
@@ -1556,6 +1630,76 @@ def F32_FP8BF8_SWMMAC_w64 : VOP3PWMMA_Profile<[v4f32, i32, v2i32, v4f32], 1,
// *** IU4X32_SWMMAC_w64 lanes 0-31 will have 8xi4 remaining lanes are ignored
// for matrix A, index is i16; Matrix B uses all lanes
+def F64_F64X4_WMMA_w32 : VOP3PWMMA_Profile<[v8f64, v2f64, v2f64, v8f64], 0, 0, 0, 0, 1>;
+def F32_F32_WMMA_w32 : VOP3PWMMA_Profile<[v8f32, v2f32, v2f32, v8f32], 0, 0, 0, 0, 1, 1>;
+def F32_BF16X32_WMMA_w32 : VOP3PWMMA_Profile<[v8f32, v16bf16, v16bf16, v8f32], 0, 0, 0, 0, 0, 1>;
+def F32_F16X32_WMMA_w32 : VOP3PWMMA_Profile<[v8f32, v16f16, v16f16, v8f32], 0, 0, 0, 0, 1, 1>;
+def F16_F16X32_WMMA_w32 : VOP3PWMMA_Profile<[v8f16, v16f16, v16f16, v8f16], 0, 0, 0, 0, 1, 1>;
+def BF16_BF16X32_WMMA_w32 : VOP3PWMMA_Profile<[v8bf16, v16bf16, v16bf16, v8bf16], 0, 0, 0, 0, 1, 1>;
+def BF16F32_BF16_WMMA_w32 : VOP3PWMMA_Profile<[v8bf16, v16bf16, v16bf16, v8f32], 0, 0, 0, 0, 1, 1>;
+def F32_FP8BF8X64_WMMA_w32 : VOP3PWMMA_Profile<[v8f32, v8i32, v8i32, v8f32], 0, 0, 0, 1, 1, 1>;
+def F32_FP8BF8X128_WMMA_w32 : VOP3PWMMA_Profile<[v8f32, v16i32, v16i32, v8f32], 0, 0, 0, 1, 1, 1>;
+def F16_FP8BF8X64_WMMA_w32 : VOP3PWMMA_Profile<[v8f16, v8i32, v8i32, v8f16], 0, 0, 0, 1, 1, 1>;
+def F16_FP8BF8X128_WMMA_w32 : VOP3PWMMA_Profile<[v8f16, v16i32, v16i32, v8f16], 0, 0, 0, 1, 1, 1>;
+def F32_32X16X128_F4_WMMA_w32 : VOP3PWMMA_Profile<[v16f32, v16i32, v8i32, v16f32], 0, 0, 0, 0, 1, 0, 1>;
+def I32_IU8X64_WMMA_w32 : VOP3PWMMA_Profile<[v8i32, v8i32, v8i32, v8i32], 0, 0, 1, 0, 1, 1>;
+def F32_F16X64_SWMMAC_w32 : VOP3PWMMA_Profile<[v8f32, v16f16, v32f16, v8f32], 1, 16, 0, 0, 1, 1>;
+def F32_BF16X64_SWMMAC_w32 : VOP3PWMMA_Profile<[v8f32, v16bf16, v32bf16, v8f32], 1, 16, 0, 0, 1, 1>;
+def F16_F16X64_SWMMAC_w32 : VOP3PWMMA_Profile<[v8f16, v16f16, v32f16, v8f16], 1, 16, 0, 0, 1, 1>;
+def BF16_BF16X64_SWMMAC_w32 : VOP3PWMMA_Profile<[v8bf16, v16bf16, v32bf16, v8bf16], 1, 16, 0, 0, 1, 1>;
+def F32_FP8BF8X128_SWMMAC_w32 : VOP3PWMMA_Profile<[v8f32, v8i32, v16i32, v8f32], 1, 32, 0, 1, 1, 1>;
+def F16_FP8BF8X128_SWMMAC_w32 : VOP3PWMMA_Profile<[v8f16, v8i32, v16i32, v8f16], 1, 32, 0, 1, 1, 1>;
+def I32_IU8X128_SWMMAC_w32 : VOP3PWMMA_Profile<[v8i32, v8i32, v16i32, v8i32], 1, 32, 1, 0, 1, 1>;
+
+let WaveSizePredicate = isWave32 in {
+let SubtargetPredicate = isGFX125xOnly in {
+defm V_WMMA_F32_16X16X4_F32_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x4_f32", F32_F32_WMMA_w32, "_w32">;
+
+let is_wmma_xdl = 1 in {
+defm V_WMMA_F32_16X16X32_BF16_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x32_bf16", F32_BF16X32_WMMA_w32, "_w32">;
+defm V_WMMA_BF16_16X16X32_BF16_w32 : WMMAInstGFX12<"v_wmma_bf16_16x16x32_bf16", BF16_BF16X32_WMMA_w32, "_w32">;
+defm V_WMMA_BF16F32_16X16X32_BF16_w32 : WMMAInstGFX12<"v_wmma_bf16f32_16x16x32_bf16", BF16F32_BF16_WMMA_w32, "_w32", 1>;
+defm V_WMMA_F32_16X16X64_FP8_FP8_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x64_fp8_fp8", F32_FP8BF8X64_WMMA_w32, "_w32">;
+defm V_WMMA_F32_16X16X64_FP8_BF8_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x64_fp8_bf8", F32_FP8BF8X64_WMMA_w32, "_w32">;
+defm V_WMMA_F32_16X16X64_BF8_FP8_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x64_bf8_fp8", F32_FP8BF8X64_WMMA_w32, "_w32">;
+defm V_WMMA_F32_16X16X64_BF8_BF8_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x64_bf8_bf8", F32_FP8BF8X64_WMMA_w32, "_w32">;
+defm V_WMMA_F16_16X16X64_FP8_FP8_w32 : WMMAInstGFX12<"v_wmma_f16_16x16x64_fp8_fp8", F16_FP8BF8X64_WMMA_w32, "_w32">;
+defm V_WMMA_F16_16X16X64_FP8_BF8_w32 : WMMAInstGFX12<"v_wmma_f16_16x16x64_fp8_bf8", F16_FP8BF8X64_WMMA_w32, "_w32">;
+defm V_WMMA_F16_16X16X64_BF8_FP8_w32 : WMMAInstGFX12<"v_wmma_f16_16x16x64_bf8_fp8", F16_FP8BF8X64_WMMA_w32, "_w32">;
+defm V_WMMA_F16_16X16X64_BF8_BF8_w32 : WMMAInstGFX12<"v_wmma_f16_16x16x64_bf8_bf8", F16_FP8BF8X64_WMMA_w32, "_w32">;
+defm V_WMMA_I32_16X16X64_IU8_w32 : WMMAInstGFX12<"v_wmma_i32_16x16x64_iu8", I32_IU8X64_WMMA_w32, "_w32">;
+defm V_WMMA_F32_16X16X32_F16_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x32_f16", F32_F16X32_WMMA_w32, "_w32">;
+defm V_WMMA_F16_16X16X32_F16_w32 : WMMAInstGFX12<"v_wmma_f16_16x16x32_f16", F16_F16X32_WMMA_w32, "_w32">;
+defm V_WMMA_F16_16X16X128_FP8_FP8_w32 : WMMAInstGFX12<"v_wmma_f16_16x16x128_fp8_fp8", F16_FP8BF8X128_WMMA_w32, "_w32">;
+defm V_WMMA_F16_16X16X128_FP8_BF8_w32 : WMMAInstGFX12<"v_wmma_f16_16x16x128_fp8_bf8", F16_FP8BF8X128_WMMA_w32, "_w32">;
+defm V_WMMA_F16_16X16X128_BF8_FP8_w32 : WMMAInstGFX12<"v_wmma_f16_16x16x128_bf8_fp8", F16_FP8BF8X128_WMMA_w32, "_w32">;
+defm V_WMMA_F16_16X16X128_BF8_BF8_w32 : WMMAInstGFX12<"v_wmma_f16_16x16x128_bf8_bf8", F16_FP8BF8X128_WMMA_w32, "_w32">;
+defm V_WMMA_F32_16X16X128_FP8_FP8_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x128_fp8_fp8", F32_FP8BF8X128_WMMA_w32, "_w32">;
+defm V_WMMA_F32_16X16X128_FP8_BF8_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x128_fp8_bf8", F32_FP8BF8X128_WMMA_w32, "_w32">;
+defm V_WMMA_F32_16X16X128_BF8_FP8_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x128_bf8_fp8", F32_FP8BF8X128_WMMA_w32, "_w32">;
+defm V_WMMA_F32_16X16X128_BF8_BF8_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x128_bf8_bf8", F32_FP8BF8X128_WMMA_w32, "_w32">;
+defm V_WMMA_F32_32X16X128_F4_w32 : WMMAInstGFX12<"v_wmma_f32_32x16x128_f4", F32_32X16X128_F4_WMMA_w32, "_w32">;
+
+defm V_SWMMAC_F32_16X16X64_BF16_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x64_bf16", F32_BF16X64_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_BF16_16X16X64_BF16_w32 : SWMMACInstGFX12<"v_swmmac_bf16_16x16x64_bf16", BF16_BF16X64_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_BF16F32_16X16X64_BF16_w32 : SWMMACInstGFX12<"v_swmmac_bf16f32_16x16x64_bf16", F32_BF16X64_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_F32_16X16X128_FP8_FP8_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x128_fp8_fp8", F32_FP8BF8X128_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_F32_16X16X128_FP8_BF8_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x128_fp8_bf8", F32_FP8BF8X128_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_F32_16X16X128_BF8_FP8_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x128_bf8_fp8", F32_FP8BF8X128_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_F32_16X16X128_BF8_BF8_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x128_bf8_bf8", F32_FP8BF8X128_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_F16_16X16X128_FP8_FP8_w32 : SWMMACInstGFX12<"v_swmmac_f16_16x16x128_fp8_fp8", F16_FP8BF8X128_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_F16_16X16X128_FP8_BF8_w32 : SWMMACInstGFX12<"v_swmmac_f16_16x16x128_fp8_bf8", F16_FP8BF8X128_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_F16_16X16X128_BF8_FP8_w32 : SWMMACInstGFX12<"v_swmmac_f16_16x16x128_bf8_fp8", F16_FP8BF8X128_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_F16_16X16X128_BF8_BF8_w32 : SWMMACInstGFX12<"v_swmmac_f16_16x16x128_bf8_bf8", F16_FP8BF8X128_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_I32_16X16X128_IU8_w32 : SWMMACInstGFX12<"v_swmmac_i32_16x16x128_iu8", I32_IU8X128_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_F32_16X16X64_F16_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x64_f16", F32_F16X64_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_F16_16X16X64_F16_w32 : SWMMACInstGFX12<"v_swmmac_f16_16x16x64_f16", F16_F16X64_SWMMAC_w32, "_w32">;
+
+} // End is_wmma_xdl = 1.
+
+} // End SubtargetPredicate = isGFX125xOnly
+} // End WaveSizePredicate = isWave32
+
let WaveSizePredicate = isWave32 in {
defm V_WMMA_F32_16X16X16_F16_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x16_f16", F32_F16_WMMA_w32, "_w32">;
defm V_WMMA_F32_16X16X16_BF16_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x16_bf16", F32_BF16_WMMA_w32, "_w32">;
@@ -1726,13 +1870,14 @@ class VOP3PeWmma<bits<8> op, VOPProfile P, VOP3PWMMA_Profile WMMAP>
// opsel
let Inst{11} = !cond(!eq(WMMAP.IndexType, 0) : 0,
!eq(WMMAP.IndexType, 8) : index_key_8bit{0},
- !eq(WMMAP.IndexType, 16) : index_key_16bit{0});
+ !eq(WMMAP.IndexType, 16) : index_key_16bit{0},
+ !eq(WMMAP.IndexType, 32) : index_key_32bit{0});
let Inst{12} = !if(!eq(WMMAP.IndexType, 8), index_key_8bit{1}, 0);
- let Inst{13} = 0;
+ let Inst{13} = !if(WMMAP.HasMatrixReuse, matrix_a_reuse, 0);
// opsel_hi
let Inst{59} = 1;
let Inst{60} = 1;
- let Inst{14} = 1;
+ let Inst{14} = !if(WMMAP.HasMatrixReuse, matrix_b_reuse, 1);
// neg_lo
let Inst{61} = !if(WMMAP.NegLo01, src0_modifiers{0}, 0);
let Inst{62} = !if(WMMAP.NegLo01, src1_modifiers{0}, 0);
@@ -1742,7 +1887,7 @@ class VOP3PeWmma<bits<8> op, VOPProfile P, VOP3PWMMA_Profile WMMAP>
let Inst{9} = !if(WMMAP.NegHi01, src1_modifiers{1}, 0);
let Inst{10} = !if(WMMAP.NegHi2, src2_modifiers{1}, 0);
// clamp
- let Inst{15} = !if(WMMAP.IsIU, clamp{0}, 0);
+ let Inst{15} = !if(WMMAP.HasClamp, clamp{0}, 0);
}
multiclass VOP3P_WMMA_Real_Base<GFXGen Gen, bits<8> op, VOP3PWMMA_Profile WMMAP,
@@ -1765,6 +1910,12 @@ multiclass VOP3P_Real_WMMA_gfx12w64 <bits<8> op, VOP3PWMMA_Profile WMMAP> {
}
}
+multiclass VOP3P_Real_WMMA_gfx1250 <bits<8> op, VOP3PWMMA_Profile WMMAP> {
+ let WaveSizePredicate = isWave32, DecoderNamespace = "GFX12" in {
+ defm _twoaddr : VOP3P_WMMA_Real_Base <GFX1250Gen, op, WMMAP>;
+ }
+}
+
defm V_WMMA_F32_16X16X16_F16_w32 : VOP3P_Real_WMMA_gfx12 <0x040, F32_F16_WMMA_w32>;
defm V_WMMA_F32_16X16X16_BF16_w32 : VOP3P_Real_WMMA_gfx12 <0x041, F32_BF16_WMMA_w32>;
defm V_WMMA_F16_16X16X16_F16_w32 : VOP3P_Real_WMMA_gfx12 <0x042, F16_F16_WMMA_w32>;
@@ -1814,6 +1965,46 @@ defm V_SWMMAC_F32_16X16X32_FP8_BF8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x058, F32_FP
defm V_SWMMAC_F32_16X16X32_BF8_FP8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x059, F32_FP8BF8_SWMMAC_w64>;
defm V_SWMMAC_F32_16X16X32_BF8_BF8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x05a, F32_FP8BF8_SWMMAC_w64>;
+defm V_WMMA_F32_16X16X4_F32_w32 : VOP3P_Real_WMMA_gfx1250 <0x05d, F32_F32_WMMA_w32>;
+defm V_WMMA_F32_16X16X32_BF16_w32 : VOP3P_Real_WMMA_gfx1250 <0x062, F32_BF16X32_WMMA_w32>;
+defm V_WMMA_F32_16X16X32_F16_w32 : VOP3P_Real_WMMA_gfx1250 <0x060, F32_F16X32_WMMA_w32>;
+defm V_WMMA_F16_16X16X32_F16_w32 : VOP3P_Real_WMMA_gfx1250 <0x061, F16_F16X32_WMMA_w32>;
+defm V_WMMA_BF16_16X16X32_BF16_w32 : VOP3P_Real_WMMA_gfx1250 <0x063, BF16_BF16X32_WMMA_w32>;
+defm V_WMMA_BF16F32_16X16X32_BF16_w32 : VOP3P_Real_WMMA_gfx1250 <0x064, BF16F32_BF16_WMMA_w32>;
+defm V_WMMA_F32_16X16X64_FP8_FP8_w32 : VOP3P_Real_WMMA_gfx1250 <0x06a, F32_FP8BF8X64_WMMA_w32>;
+defm V_WMMA_F32_16X16X64_FP8_BF8_w32 : VOP3P_Real_WMMA_gfx1250 <0x06b, F32_FP8BF8X64_WMMA_w32>;
+defm V_WMMA_F32_16X16X64_BF8_FP8_w32 : VOP3P_Real_WMMA_gfx1250 <0x06c, F32_FP8BF8X64_WMMA_w32>;
+defm V_WMMA_F32_16X16X64_BF8_BF8_w32 : VOP3P_Real_WMMA_gfx1250 <0x06d, F32_FP8BF8X64_WMMA_w32>;
+defm V_WMMA_F16_16X16X64_FP8_FP8_w32 : VOP3P_Real_WMMA_gfx1250 <0x06e, F16_FP8BF8X64_WMMA_w32>;
+defm V_WMMA_F16_16X16X64_FP8_BF8_w32 : VOP3P_Real_WMMA_gfx1250 <0x06f, F16_FP8BF8X64_WMMA_w32>;
+defm V_WMMA_F16_16X16X64_BF8_FP8_w32 : VOP3P_Real_WMMA_gfx1250 <0x070, F16_FP8BF8X64_WMMA_w32>;
+defm V_WMMA_F16_16X16X64_BF8_BF8_w32 : VOP3P_Real_WMMA_gfx1250 <0x071, F16_FP8BF8X64_WMMA_w32>;
+defm V_WMMA_I32_16X16X64_IU8_w32 : VOP3P_Real_WMMA_gfx1250 <0x072, I32_IU8X64_WMMA_w32>;
+defm V_WMMA_F32_16X16X128_FP8_FP8_w32 : VOP3P_Real_WMMA_gfx1250 <0x080, F32_FP8BF8X128_WMMA_w32>;
+defm V_WMMA_F32_16X16X128_FP8_BF8_w32 : VOP3P_Real_WMMA_gfx1250 <0x081, F32_FP8BF8X128_WMMA_w32>;
+defm V_WMMA_F32_16X16X128_BF8_FP8_w32 : VOP3P_Real_WMMA_gfx1250 <0x082, F32_FP8BF8X128_WMMA_w32>;
+defm V_WMMA_F32_16X16X128_BF8_BF8_w32 : VOP3P_Real_WMMA_gfx1250 <0x083, F32_FP8BF8X128_WMMA_w32>;
+defm V_WMMA_F16_16X16X128_FP8_FP8_w32 : VOP3P_Real_WMMA_gfx1250 <0x084, F16_FP8BF8X128_WMMA_w32>;
+defm V_WMMA_F16_16X16X128_FP8_BF8_w32 : VOP3P_Real_WMMA_gfx1250 <0x085, F16_FP8BF8X128_WMMA_w32>;
+defm V_WMMA_F16_16X16X128_BF8_FP8_w32 : VOP3P_Real_WMMA_gfx1250 <0x086, F16_FP8BF8X128_WMMA_w32>;
+defm V_WMMA_F16_16X16X128_BF8_BF8_w32 : VOP3P_Real_WMMA_gfx1250 <0x087, F16_FP8BF8X128_WMMA_w32>;
+defm V_WMMA_F32_32X16X128_F4_w32 : VOP3P_Real_WMMA_gfx1250 <0x088, F32_32X16X128_F4_WMMA_w32>;
+
+defm V_SWMMAC_F32_16X16X64_F16_w32 : VOP3P_Real_WMMA_gfx1250 <0x065, F32_F16X64_SWMMAC_w32>;
+defm V_SWMMAC_F32_16X16X64_BF16_w32 : VOP3P_Real_WMMA_gfx1250 <0x066, F32_BF16X64_SWMMAC_w32>;
+defm V_SWMMAC_F16_16X16X64_F16_w32 : VOP3P_Real_WMMA_gfx1250 <0x067, F16_F16X64_SWMMAC_w32>;
+defm V_SWMMAC_BF16_16X16X64_BF16_w32 : VOP3P_Real_WMMA_gfx1250 <0x068, BF16_BF16X64_SWMMAC_w32>;
+defm V_SWMMAC_BF16F32_16X16X64_BF16_w32 : VOP3P_Real_WMMA_gfx1250 <0x069, F32_BF16X64_SWMMAC_w32>;
+defm V_SWMMAC_F32_16X16X128_FP8_FP8_w32 : VOP3P_Real_WMMA_gfx1250 <0x073, F32_FP8BF8X128_SWMMAC_w32>;
+defm V_SWMMAC_F32_16X16X128_FP8_BF8_w32 : VOP3P_Real_WMMA_gfx1250 <0x074, F32_FP8BF8X128_SWMMAC_w32>;
+defm V_SWMMAC_F32_16X16X128_BF8_FP8_w32 : VOP3P_Real_WMMA_gfx1250 <0x075, F32_FP8BF8X128_SWMMAC_w32>;
+defm V_SWMMAC_F32_16X16X128_BF8_BF8_w32 : VOP3P_Real_WMMA_gfx1250 <0x076, F32_FP8BF8X128_SWMMAC_w32>;
+defm V_SWMMAC_F16_16X16X128_FP8_FP8_w32 : VOP3P_Real_WMMA_gfx1250 <0x077, F16_FP8BF8X128_SWMMAC_w32>;
+defm V_SWMMAC_F16_16X16X128_FP8_BF8_w32 : VOP3P_Real_WMMA_gfx1250 <0x078, F16_FP8BF8X128_SWMMAC_w32>;
+defm V_SWMMAC_F16_16X16X128_BF8_FP8_w32 : VOP3P_Real_WMMA_gfx1250 <0x079, F16_FP8BF8X128_SWMMAC_w32>;
+defm V_SWMMAC_F16_16X16X128_BF8_BF8_w32 : VOP3P_Real_WMMA_gfx1250 <0x07a, F16_FP8BF8X128_SWMMAC_w32>;
+defm V_SWMMAC_I32_16X16X128_IU8_w32 : VOP3P_Real_WMMA_gfx1250 <0x07b, I32_IU8X128_SWMMAC_w32>;
+
multiclass VOP3P_Real_with_name<GFXGen Gen, bits<8> op,
string backing_ps_name = NAME,
string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> {
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index df215d23f7f40..2b91ea7386be4 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -432,7 +432,7 @@ class VOP3be <VOPProfile P> : Enc64 {
let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0);
}
-class VOP3Pe <VOPProfile P> : Enc64 {
+class VOP3Pe_Base {
bits<8> vdst;
bits<4> src0_modifiers;
bits<9> src0;
@@ -443,7 +443,12 @@ class VOP3Pe <VOPProfile P> : Enc64 {
bits<1> clamp;
bits<2> index_key_8bit;
bits<1> index_key_16bit;
+ bits<1> index_key_32bit;
+ bits<1> matrix_a_reuse;
+ bits<1> matrix_b_reuse;
+}
+class VOP3Pe <VOPProfile P> : Enc64, VOP3Pe_Base {
let Inst{7-0} = !if(P.HasDst, vdst, 0);
let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); // neg_hi src0
let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); // neg_hi src1
@@ -451,9 +456,13 @@ class VOP3Pe <VOPProfile P> : Enc64 {
let Inst{11} = !if(!and(P.HasSrc0, P.HasOpSel), src0_modifiers{2}, 0); // op_sel(0)
let Inst{12} = !if(!and(P.HasSrc1, P.HasOpSel), src1_modifiers{2}, 0); // op_sel(1)
- let Inst{13} = !if(!and(P.HasSrc2, P.HasOpSel), src2_modifiers{2}, 0); // op_sel(2)
+ let Inst{13} = !if(!and(P.HasSrc2, P.HasOpSel), src2_modifiers{2},
+ !if(P.HasMatrixReuse, matrix_a_reuse, 0)); // op_sel(2)
- let Inst{14} = !if(!and(P.HasSrc2, P.HasOpSel), src2_modifiers{3}, !if(P.IsDOT, 1, ?)); // op_sel_hi(2)
+ let Inst{14} = !cond(!and(P.HasSrc2, P.HasOpSel) : src2_modifiers{3},
+ P.IsDOT : 1,
+ P.HasMatrixReuse : matrix_b_reuse,
+ 1: ?); // op_sel_hi(2)
let Inst{15} = !if(P.HasClamp, clamp{0}, 0);
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_wmma_w32.s b/llvm/test/MC/AMDGPU/gfx1250_asm_wmma_w32.s
new file mode 100644
index 0000000000000..e81b6a1d13391
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_wmma_w32.s
@@ -0,0 +1,1234 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=WAVESIZE-ERR --implicit-check-not=error: --strict-whitespace %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
+
+v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11]
+// GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] ; encoding: [0x04,0x00,0x5d,0xcc,0x00,0x05,0x12,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], 1.0
+// GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], 1.0 ; encoding: [0x04,0x00,0x5d,0xcc,0x00,0x05,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], 1.0 neg_lo:[0,0,1] ; encoding: [0x04,0x00,0x5d,0xcc,0x00,0x05,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0]
+// GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0] ; encoding: [0x04,0x00,0x5d,0xcc,0x00,0x05,0x12,0x3c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0]
+// GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0] ; encoding: [0x04,0x00,0x5d,0xcc,0x00,0x05,0x12,0x5c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1] ; encoding: [0x04,0x00,0x5d,0xcc,0x00,0x05,0x12,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1] ; encoding: [0x04,0x04,0x5d,0xcc,0x00,0x05,0x12,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] matrix_a_reuse
+// GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] matrix_a_reuse ; encoding: [0x04,0x20,0x5d,0xcc,0x00,0x05,0x12,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] matrix_b_reuse
+// GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] matrix_b_reuse ; encoding: [0x04,0x40,0x5d,0xcc,0x00,0x05,0x12,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23]
+// GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x62,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], 1.0
+// GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x62,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x62,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] neg_hi:[1,0,0]
+// GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x10,0x01,0x62,0xcc,0x00,0x11,0x42,0x3c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] neg_hi:[0,1,0]
+// GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x10,0x02,0x62,0xcc,0x00,0x11,0x42,0x5c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x62,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x62,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse
+// GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x62,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse
+// GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x62,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19]
+// GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x00,0x63,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], 1.0
+// GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x63,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x63,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[1,0,0] neg_hi:[1,0,0]
+// GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x10,0x01,0x63,0xcc,0x00,0x11,0x42,0x3c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,1,0] neg_hi:[0,1,0]
+// GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x10,0x02,0x63,0xcc,0x00,0x11,0x42,0x5c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1]
+// GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x63,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1]
+// GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x63,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse
+// GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x63,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse
+// GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x63,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23]
+// GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] ; encoding: [0x1a,0x00,0x64,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], 1.0
+// GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], 1.0 ; encoding: [0x1a,0x00,0x64,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x1a,0x00,0x64,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] neg_hi:[1,0,0]
+// GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x1a,0x01,0x64,0xcc,0x00,0x11,0x42,0x3c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] neg_hi:[0,1,0]
+// GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x1a,0x02,0x64,0xcc,0x00,0x11,0x42,0x5c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1]
+// GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x1a,0x00,0x64,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1]
+// GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x1a,0x04,0x64,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] matrix_a_reuse
+// GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x1a,0x20,0x64,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] matrix_b_reuse
+// GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x1a,0x40,0x64,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23]
+// GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x6a,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], 1.0
+// GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x6a,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6a,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6a,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x6a,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse
+// GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x6a,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse
+// GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x6a,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23]
+// GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x6b,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], 1.0
+// GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x6b,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6b,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6b,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x6b,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse
+// GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x6b,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse
+// GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x6b,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23]
+// GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x6c,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], 1.0
+// GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x6c,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6c,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6c,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x6c,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse
+// GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x6c,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse
+// GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x6c,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23]
+// GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x6d,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], 1.0
+// GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x6d,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6d,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6d,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x6d,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse
+// GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x6d,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse
+// GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x6d,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19]
+// GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x00,0x6e,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], 1.0
+// GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x6e,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6e,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6e,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x6e,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse
+// GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x6e,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse
+// GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x6e,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19]
+// GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x00,0x6f,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], 1.0
+// GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x6f,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6f,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6f,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x6f,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse
+// GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x6f,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse
+// GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x6f,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19]
+// GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x00,0x70,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], 1.0
+// GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x70,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x70,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x70,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x70,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse
+// GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x70,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse
+// GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x70,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19]
+// GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x00,0x71,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], 1.0
+// GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x71,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x71,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x71,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x71,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse
+// GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x71,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse
+// GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x71,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23]
+// GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x72,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], 1
+// GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], 1 ; encoding: [0x10,0x00,0x72,0xcc,0x00,0x11,0x06,0x1a]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0]
+// GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] ; encoding: [0x10,0x00,0x72,0xcc,0x00,0x11,0x42,0x3c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0]
+// GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] ; encoding: [0x10,0x00,0x72,0xcc,0x00,0x11,0x42,0x5c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse
+// GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x72,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse
+// GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x72,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23]
+// GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x60,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], 1.0
+// GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x60,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x60,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] neg_hi:[1,0,0]
+// GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x10,0x01,0x60,0xcc,0x00,0x11,0x42,0x3c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] neg_hi:[0,1,0]
+// GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x10,0x02,0x60,0xcc,0x00,0x11,0x42,0x5c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x60,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x60,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse
+// GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x60,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse
+// GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x60,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19]
+// GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x00,0x61,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], 1.0
+// GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x61,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x61,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[1,0,0] neg_hi:[1,0,0]
+// GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x10,0x01,0x61,0xcc,0x00,0x11,0x42,0x3c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,1,0] neg_hi:[0,1,0]
+// GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x10,0x02,0x61,0xcc,0x00,0x11,0x42,0x5c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x61,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x61,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse
+// GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x61,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse
+// GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x61,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32
+// GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 ; encoding: [0x18,0x00,0x66,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 index_key:1
+// GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 index_key:1 ; encoding: [0x18,0x08,0x66,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[1,0,0] neg_hi:[1,0,0]
+// GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x18,0x01,0x66,0xcc,0x00,0x11,0x82,0x3c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[0,1,0] neg_hi:[0,1,0]
+// GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x18,0x02,0x66,0xcc,0x00,0x11,0x82,0x5c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_a_reuse
+// GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_a_reuse ; encoding: [0x18,0x20,0x66,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_b_reuse
+// GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_b_reuse ; encoding: [0x18,0x40,0x66,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28
+// GFX1250: v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 ; encoding: [0x18,0x00,0x68,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 index_key:1
+// GFX1250: v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 index_key:1 ; encoding: [0x18,0x08,0x68,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[1,0,0] neg_hi:[1,0,0]
+// GFX1250: v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x18,0x01,0x68,0xcc,0x00,0x11,0x72,0x3c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[0,1,0] neg_hi:[0,1,0]
+// GFX1250: v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x18,0x02,0x68,0xcc,0x00,0x11,0x72,0x5c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_a_reuse
+// GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_a_reuse ; encoding: [0x18,0x20,0x66,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_b_reuse
+// GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_b_reuse ; encoding: [0x18,0x40,0x66,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32
+// GFX1250: v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 ; encoding: [0x18,0x00,0x69,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 index_key:1
+// GFX1250: v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 index_key:1 ; encoding: [0x18,0x08,0x69,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[1,0,0] neg_hi:[1,0,0]
+// GFX1250: v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x18,0x01,0x69,0xcc,0x00,0x11,0x82,0x3c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[0,1,0] neg_hi:[0,1,0]
+// GFX1250: v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x18,0x02,0x69,0xcc,0x00,0x11,0x82,0x5c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_a_reuse
+// GFX1250: v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_a_reuse ; encoding: [0x18,0x20,0x69,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_b_reuse
+// GFX1250: v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_b_reuse ; encoding: [0x18,0x40,0x69,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33]
+// GFX1250: v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] ; encoding: [0x18,0x00,0x73,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1
+// GFX1250: v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1 ; encoding: [0x18,0x08,0x73,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse
+// GFX1250: v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse ; encoding: [0x18,0x20,0x73,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse
+// GFX1250: v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse ; encoding: [0x18,0x40,0x73,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33]
+// GFX1250: v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] ; encoding: [0x18,0x00,0x74,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1
+// GFX1250: v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1 ; encoding: [0x18,0x08,0x74,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse
+// GFX1250: v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse ; encoding: [0x18,0x20,0x74,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse
+// GFX1250: v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse ; encoding: [0x18,0x40,0x74,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33]
+// GFX1250: v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] ; encoding: [0x18,0x00,0x75,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1
+// GFX1250: v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1 ; encoding: [0x18,0x08,0x75,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse
+// GFX1250: v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse ; encoding: [0x18,0x20,0x75,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse
+// GFX1250: v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse ; encoding: [0x18,0x40,0x75,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33]
+// GFX1250: v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] ; encoding: [0x18,0x00,0x76,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1
+// GFX1250: v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1 ; encoding: [0x18,0x08,0x76,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse
+// GFX1250: v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse ; encoding: [0x18,0x20,0x76,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse
+// GFX1250: v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse ; encoding: [0x18,0x40,0x76,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29]
+// GFX1250: v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] ; encoding: [0x18,0x00,0x77,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:1
+// GFX1250: v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:1 ; encoding: [0x18,0x08,0x77,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_a_reuse
+// GFX1250: v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_a_reuse ; encoding: [0x18,0x20,0x77,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_b_reuse
+// GFX1250: v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_b_reuse ; encoding: [0x18,0x40,0x77,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29]
+// GFX1250: v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] ; encoding: [0x18,0x00,0x78,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:1
+// GFX1250: v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:1 ; encoding: [0x18,0x08,0x78,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_a_reuse
+// GFX1250: v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_a_reuse ; encoding: [0x18,0x20,0x78,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_b_reuse
+// GFX1250: v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_b_reuse ; encoding: [0x18,0x40,0x78,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29]
+// GFX1250: v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] ; encoding: [0x18,0x00,0x79,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:1
+// GFX1250: v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:1 ; encoding: [0x18,0x08,0x79,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_a_reuse
+// GFX1250: v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_a_reuse ; encoding: [0x18,0x20,0x79,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_b_reuse
+// GFX1250: v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_b_reuse ; encoding: [0x18,0x40,0x79,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29]
+// GFX1250: v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] ; encoding: [0x18,0x00,0x7a,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:1
+// GFX1250: v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:1 ; encoding: [0x18,0x08,0x7a,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_a_reuse
+// GFX1250: v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_a_reuse ; encoding: [0x18,0x20,0x7a,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_b_reuse
+// GFX1250: v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_b_reuse ; encoding: [0x18,0x40,0x7a,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33]
+// GFX1250: v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] ; encoding: [0x18,0x00,0x7b,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1
+// GFX1250: v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1 ; encoding: [0x18,0x08,0x7b,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[1,0,0]
+// GFX1250: v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[1,0,0] ; encoding: [0x18,0x00,0x7b,0xcc,0x00,0x11,0x82,0x3c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[0,1,0]
+// GFX1250: v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[0,1,0] ; encoding: [0x18,0x00,0x7b,0xcc,0x00,0x11,0x82,0x5c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse
+// GFX1250: v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse ; encoding: [0x18,0x20,0x7b,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse
+// GFX1250: v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse ; encoding: [0x18,0x40,0x7b,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32
+// GFX1250: v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 ; encoding: [0x18,0x00,0x65,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 index_key:1
+// GFX1250: v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 index_key:1 ; encoding: [0x18,0x08,0x65,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[1,0,0] neg_hi:[1,0,0]
+// GFX1250: v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x18,0x01,0x65,0xcc,0x00,0x11,0x82,0x3c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[0,1,0] neg_hi:[0,1,0]
+// GFX1250: v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x18,0x02,0x65,0xcc,0x00,0x11,0x82,0x5c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 matrix_a_reuse
+// GFX1250: v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 matrix_a_reuse ; encoding: [0x18,0x20,0x65,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 matrix_b_reuse
+// GFX1250: v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 matrix_b_reuse ; encoding: [0x18,0x40,0x65,0xcc,0x00,0x11,0x82,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28
+// GFX1250: v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 ; encoding: [0x18,0x00,0x67,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 index_key:1
+// GFX1250: v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 index_key:1 ; encoding: [0x18,0x08,0x67,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[1,0,0] neg_hi:[1,0,0]
+// GFX1250: v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x18,0x01,0x67,0xcc,0x00,0x11,0x72,0x3c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[0,1,0] neg_hi:[0,1,0]
+// GFX1250: v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x18,0x02,0x67,0xcc,0x00,0x11,0x72,0x5c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 matrix_a_reuse
+// GFX1250: v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 matrix_a_reuse ; encoding: [0x18,0x20,0x67,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 matrix_b_reuse
+// GFX1250: v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 matrix_b_reuse ; encoding: [0x18,0x40,0x67,0xcc,0x00,0x11,0x72,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19]
+// GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] ; encoding: [0x10,0x00,0x84,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], 1.0
+// GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x84,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x84,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x84,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x84,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_a_reuse
+// GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x84,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_b_reuse
+// GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x84,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19]
+// GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] ; encoding: [0x10,0x00,0x85,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], 1.0
+// GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x85,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x85,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x85,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x85,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_a_reuse
+// GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x85,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_b_reuse
+// GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x85,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19]
+// GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] ; encoding: [0x10,0x00,0x86,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], 1.0
+// GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x86,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x86,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x86,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x86,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_a_reuse
+// GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x86,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_b_reuse
+// GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x86,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19]
+// GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] ; encoding: [0x10,0x00,0x87,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], 1.0
+// GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x87,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x87,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x87,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x87,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_a_reuse
+// GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x87,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_b_reuse
+// GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x87,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23]
+// GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] ; encoding: [0x10,0x00,0x80,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], 1.0
+// GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x80,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x80,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x80,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x80,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_a_reuse
+// GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x80,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_b_reuse
+// GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x80,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23]
+// GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] ; encoding: [0x10,0x00,0x81,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], 1.0
+// GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x81,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x81,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x81,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x81,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_a_reuse
+// GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x81,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_b_reuse
+// GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x81,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23]
+// GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] ; encoding: [0x10,0x00,0x82,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], 1.0
+// GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x82,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x82,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x82,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x82,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_a_reuse
+// GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x82,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_b_reuse
+// GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x82,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23]
+// GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] ; encoding: [0x10,0x00,0x83,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], 1.0
+// GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x83,0xcc,0x00,0x11,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x83,0xcc,0x00,0x11,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x83,0xcc,0x00,0x11,0x42,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x83,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_a_reuse
+// GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x83,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_b_reuse
+// GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x83,0xcc,0x00,0x11,0x42,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19]
+// GFX1250: v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] ; encoding: [0x04,0x40,0x88,0xcc,0x00,0x05,0x12,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], 1.0
+// GFX1250: v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], 1.0 ; encoding: [0x04,0x40,0x88,0xcc,0x00,0x05,0xca,0x1b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], 1.0 neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], 1.0 neg_lo:[0,0,1] ; encoding: [0x04,0x40,0x88,0xcc,0x00,0x05,0xca,0x9b]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_lo:[0,0,1]
+// GFX1250: v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_lo:[0,0,1] ; encoding: [0x04,0x40,0x88,0xcc,0x00,0x05,0x12,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_hi:[0,0,1] ; encoding: [0x04,0x44,0x88,0xcc,0x00,0x05,0x12,0x1c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+
+v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_lo:[0,0,1] neg_hi:[0,0,1]
+// GFX1250: v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x04,0x44,0x88,0xcc,0x00,0x05,0x12,0x9c]
+// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32
+// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_wmma_w32_err.s b/llvm/test/MC/AMDGPU/gfx1250_asm_wmma_w32_err.s
new file mode 100644
index 0000000000000..47445d368d3a0
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_wmma_w32_err.s
@@ -0,0 +1,384 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: %s
+
+v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], s[4:11]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], 3.0
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], s[16:23]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], 3.0
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], s[16:19]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], 3.0
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], s[16:23]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], 3.0
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], s[16:23]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], 3.0
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], s[16:23]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], 3.0
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], s[16:23]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], 3.0
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], s[16:23]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], 3.0
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], s[16:19]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], 3.0
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], s[16:19]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], 3.0
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], s[16:19]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], 3.0
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], s[16:19]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], 3.0
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], s[16:23]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], 128
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] clamp
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], s[16:23]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], 3.0
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], s[16:19]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], 3.0
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], s32
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], 1.0, v32
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 index_key:2
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[0,0,1]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], s28
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], 1.0, v28
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 index_key:2
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[0,0,1]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], s32
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], 1.0, v32
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 index_key:2
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[0,0,1]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], s[32:33]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], 1.0, v[32:33]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:2
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[0,0,1]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], s[32:33]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], 1.0, v[32:33]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:2
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[0,0,1]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], s[32:33]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], 1.0, v[32:33]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:2
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[0,0,1]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], s[32:33]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], 1.0, v[32:33]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:2
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[0,0,1]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], s[28:29]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], 1.0, v[28:29]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:2
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] neg_lo:[0,0,1]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], s[28:29]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], 1.0, v[28:29]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:2
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] neg_lo:[0,0,1]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], s[28:29]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], 1.0, v[28:29]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:2
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] neg_lo:[0,0,1]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], s[28:29]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], 1.0, v[28:29]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:2
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] neg_lo:[0,0,1]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], s[32:33]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], 1, v[32:33]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] clamp
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:2
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[0,0,1]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], s32
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], 1.0, v32
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 index_key:2
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[0,0,1]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], s28
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], 1.0, v28
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 index_key:2
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[0,0,1]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_lo:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+// GFX1250-ERR-NEXT: {{^}}v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_lo:[1,0,0]
+// GFX1250-ERR-NEXT: {{^}} ^
+
+v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_lo:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+// GFX1250-ERR-NEXT: {{^}}v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_lo:[0,1,0]
+// GFX1250-ERR-NEXT: {{^}} ^
+
+v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_hi:[1,0,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand
+// GFX1250-ERR-NEXT: {{^}}v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_hi:[1,0,0]
+// GFX1250-ERR-NEXT: {{^}} ^
+
+v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_hi:[0,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand
+// GFX1250-ERR-NEXT: {{^}}v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_hi:[0,1,0]
+// GFX1250-ERR-NEXT: {{^}} ^
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_wmma_w32.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_wmma_w32.txt
new file mode 100644
index 0000000000000..d76ec4c7e6185
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_wmma_w32.txt
@@ -0,0 +1,734 @@
+# NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+# RUN: llvm-mc -disassemble -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
+
+0x18,0x00,0x68,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 ; encoding: [0x18,0x00,0x68,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x08,0x68,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 index_key:1 ; encoding: [0x18,0x08,0x68,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x02,0x68,0xcc,0x00,0x11,0x72,0x5c
+# GFX1250: v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x18,0x02,0x68,0xcc,0x00,0x11,0x72,0x5c]
+
+0x18,0x01,0x68,0xcc,0x00,0x11,0x72,0x3c
+# GFX1250: v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x18,0x01,0x68,0xcc,0x00,0x11,0x72,0x3c]
+
+0x18,0x00,0x69,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 ; encoding: [0x18,0x00,0x69,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x08,0x69,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 index_key:1 ; encoding: [0x18,0x08,0x69,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x20,0x69,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_a_reuse ; encoding: [0x18,0x20,0x69,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x40,0x69,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_b_reuse ; encoding: [0x18,0x40,0x69,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x02,0x69,0xcc,0x00,0x11,0x82,0x5c
+# GFX1250: v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x18,0x02,0x69,0xcc,0x00,0x11,0x82,0x5c]
+
+0x18,0x01,0x69,0xcc,0x00,0x11,0x82,0x3c
+# GFX1250: v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x18,0x01,0x69,0xcc,0x00,0x11,0x82,0x3c]
+
+0x18,0x00,0x7a,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] ; encoding: [0x18,0x00,0x7a,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x08,0x7a,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:1 ; encoding: [0x18,0x08,0x7a,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x20,0x7a,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_a_reuse ; encoding: [0x18,0x20,0x7a,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x40,0x7a,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_b_reuse ; encoding: [0x18,0x40,0x7a,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x00,0x79,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] ; encoding: [0x18,0x00,0x79,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x08,0x79,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:1 ; encoding: [0x18,0x08,0x79,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x20,0x79,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_a_reuse ; encoding: [0x18,0x20,0x79,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x40,0x79,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_b_reuse ; encoding: [0x18,0x40,0x79,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x00,0x78,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] ; encoding: [0x18,0x00,0x78,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x08,0x78,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:1 ; encoding: [0x18,0x08,0x78,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x20,0x78,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_a_reuse ; encoding: [0x18,0x20,0x78,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x40,0x78,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_b_reuse ; encoding: [0x18,0x40,0x78,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x00,0x77,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] ; encoding: [0x18,0x00,0x77,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x08,0x77,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:1 ; encoding: [0x18,0x08,0x77,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x20,0x77,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_a_reuse ; encoding: [0x18,0x20,0x77,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x40,0x77,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_b_reuse ; encoding: [0x18,0x40,0x77,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x00,0x67,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 ; encoding: [0x18,0x00,0x67,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x08,0x67,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 index_key:1 ; encoding: [0x18,0x08,0x67,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x20,0x67,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 matrix_a_reuse ; encoding: [0x18,0x20,0x67,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x40,0x67,0xcc,0x00,0x11,0x72,0x1c
+# GFX1250: v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 matrix_b_reuse ; encoding: [0x18,0x40,0x67,0xcc,0x00,0x11,0x72,0x1c]
+
+0x18,0x02,0x67,0xcc,0x00,0x11,0x72,0x5c
+# GFX1250: v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x18,0x02,0x67,0xcc,0x00,0x11,0x72,0x5c]
+
+0x18,0x01,0x67,0xcc,0x00,0x11,0x72,0x3c
+# GFX1250: v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x18,0x01,0x67,0xcc,0x00,0x11,0x72,0x3c]
+
+0x18,0x00,0x76,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] ; encoding: [0x18,0x00,0x76,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x08,0x76,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1 ; encoding: [0x18,0x08,0x76,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x20,0x76,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse ; encoding: [0x18,0x20,0x76,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x40,0x76,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse ; encoding: [0x18,0x40,0x76,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x00,0x75,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] ; encoding: [0x18,0x00,0x75,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x08,0x75,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1 ; encoding: [0x18,0x08,0x75,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x20,0x75,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse ; encoding: [0x18,0x20,0x75,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x40,0x75,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse ; encoding: [0x18,0x40,0x75,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x00,0x74,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] ; encoding: [0x18,0x00,0x74,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x08,0x74,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1 ; encoding: [0x18,0x08,0x74,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x20,0x74,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse ; encoding: [0x18,0x20,0x74,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x40,0x74,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse ; encoding: [0x18,0x40,0x74,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x00,0x73,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] ; encoding: [0x18,0x00,0x73,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x08,0x73,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1 ; encoding: [0x18,0x08,0x73,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x20,0x73,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse ; encoding: [0x18,0x20,0x73,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x40,0x73,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse ; encoding: [0x18,0x40,0x73,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x00,0x66,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 ; encoding: [0x18,0x00,0x66,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x08,0x66,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 index_key:1 ; encoding: [0x18,0x08,0x66,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x20,0x66,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_a_reuse ; encoding: [0x18,0x20,0x66,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x40,0x66,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_b_reuse ; encoding: [0x18,0x40,0x66,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x02,0x66,0xcc,0x00,0x11,0x82,0x5c
+# GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x18,0x02,0x66,0xcc,0x00,0x11,0x82,0x5c]
+
+0x18,0x01,0x66,0xcc,0x00,0x11,0x82,0x3c
+# GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x18,0x01,0x66,0xcc,0x00,0x11,0x82,0x3c]
+
+0x18,0x00,0x65,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 ; encoding: [0x18,0x00,0x65,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x08,0x65,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 index_key:1 ; encoding: [0x18,0x08,0x65,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x20,0x65,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 matrix_a_reuse ; encoding: [0x18,0x20,0x65,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x40,0x65,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 matrix_b_reuse ; encoding: [0x18,0x40,0x65,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x02,0x65,0xcc,0x00,0x11,0x82,0x5c
+# GFX1250: v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x18,0x02,0x65,0xcc,0x00,0x11,0x82,0x5c]
+
+0x18,0x01,0x65,0xcc,0x00,0x11,0x82,0x3c
+# GFX1250: v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x18,0x01,0x65,0xcc,0x00,0x11,0x82,0x3c]
+
+0x18,0x00,0x7b,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] ; encoding: [0x18,0x00,0x7b,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x08,0x7b,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1 ; encoding: [0x18,0x08,0x7b,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x20,0x7b,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse ; encoding: [0x18,0x20,0x7b,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x40,0x7b,0xcc,0x00,0x11,0x82,0x1c
+# GFX1250: v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse ; encoding: [0x18,0x40,0x7b,0xcc,0x00,0x11,0x82,0x1c]
+
+0x18,0x00,0x7b,0xcc,0x00,0x11,0x82,0x5c
+# GFX1250: v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[0,1,0] ; encoding: [0x18,0x00,0x7b,0xcc,0x00,0x11,0x82,0x5c]
+
+0x18,0x00,0x7b,0xcc,0x00,0x11,0x82,0x3c
+# GFX1250: v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[1,0,0] ; encoding: [0x18,0x00,0x7b,0xcc,0x00,0x11,0x82,0x3c]
+
+0x10,0x00,0x63,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x63,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x63,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x63,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x63,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x00,0x63,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x63,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x63,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x63,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x63,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x63,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x63,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x63,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x63,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x02,0x63,0xcc,0x00,0x11,0x42,0x5c
+# GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x10,0x02,0x63,0xcc,0x00,0x11,0x42,0x5c]
+
+0x10,0x01,0x63,0xcc,0x00,0x11,0x42,0x3c
+# GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x10,0x01,0x63,0xcc,0x00,0x11,0x42,0x3c]
+
+0x1a,0x00,0x64,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], 1.0 ; encoding: [0x1a,0x00,0x64,0xcc,0x00,0x11,0xca,0x1b]
+
+0x1a,0x00,0x64,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x1a,0x00,0x64,0xcc,0x00,0x11,0xca,0x9b]
+
+0x1a,0x00,0x64,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] ; encoding: [0x1a,0x00,0x64,0xcc,0x00,0x11,0x42,0x1c]
+
+0x1a,0x20,0x64,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x1a,0x20,0x64,0xcc,0x00,0x11,0x42,0x1c]
+
+0x1a,0x40,0x64,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x1a,0x40,0x64,0xcc,0x00,0x11,0x42,0x1c]
+
+0x1a,0x04,0x64,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x1a,0x04,0x64,0xcc,0x00,0x11,0x42,0x1c]
+
+0x1a,0x00,0x64,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x1a,0x00,0x64,0xcc,0x00,0x11,0x42,0x9c]
+
+0x1a,0x02,0x64,0xcc,0x00,0x11,0x42,0x5c
+# GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x1a,0x02,0x64,0xcc,0x00,0x11,0x42,0x5c]
+
+0x1a,0x01,0x64,0xcc,0x00,0x11,0x42,0x3c
+# GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x1a,0x01,0x64,0xcc,0x00,0x11,0x42,0x3c]
+
+0x10,0x00,0x61,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x61,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x61,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x61,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x61,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x00,0x61,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x61,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x61,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x61,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x61,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x61,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x61,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x61,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x61,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x02,0x61,0xcc,0x00,0x11,0x42,0x5c
+# GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x10,0x02,0x61,0xcc,0x00,0x11,0x42,0x5c]
+
+0x10,0x01,0x61,0xcc,0x00,0x11,0x42,0x3c
+# GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x10,0x01,0x61,0xcc,0x00,0x11,0x42,0x3c]
+
+0x10,0x00,0x71,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x71,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x71,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x71,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x71,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x00,0x71,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x71,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x71,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x71,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x71,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x71,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x71,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x71,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x71,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x00,0x70,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x70,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x70,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x70,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x70,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x00,0x70,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x70,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x70,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x70,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x70,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x70,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x70,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x70,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x70,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x00,0x6f,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x6f,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x6f,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6f,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x6f,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x00,0x6f,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x6f,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x6f,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x6f,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x6f,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x6f,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x6f,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x6f,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6f,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x00,0x6e,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x6e,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x6e,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6e,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x6e,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x00,0x6e,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x6e,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x6e,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x6e,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x6e,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x6e,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x6e,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x6e,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6e,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x00,0x62,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x62,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x62,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x62,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x62,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x62,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x62,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x62,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x62,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x62,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x62,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x62,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x62,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x62,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x02,0x62,0xcc,0x00,0x11,0x42,0x5c
+# GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x10,0x02,0x62,0xcc,0x00,0x11,0x42,0x5c]
+
+0x10,0x01,0x62,0xcc,0x00,0x11,0x42,0x3c
+# GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x10,0x01,0x62,0xcc,0x00,0x11,0x42,0x3c]
+
+0x10,0x00,0x60,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x60,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x60,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x60,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x60,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x60,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x60,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x60,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x60,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x60,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x60,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x60,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x60,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x60,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x02,0x60,0xcc,0x00,0x11,0x42,0x5c
+# GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x10,0x02,0x60,0xcc,0x00,0x11,0x42,0x5c]
+
+0x10,0x01,0x60,0xcc,0x00,0x11,0x42,0x3c
+# GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x10,0x01,0x60,0xcc,0x00,0x11,0x42,0x3c]
+
+0x04,0x00,0x5d,0xcc,0x00,0x05,0xca,0x1b
+# GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], 1.0 ; encoding: [0x04,0x00,0x5d,0xcc,0x00,0x05,0xca,0x1b]
+
+0x04,0x00,0x5d,0xcc,0x00,0x05,0xca,0x9b
+# GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], 1.0 neg_lo:[0,0,1] ; encoding: [0x04,0x00,0x5d,0xcc,0x00,0x05,0xca,0x9b]
+
+0x04,0x00,0x5d,0xcc,0x00,0x05,0x12,0x1c
+# GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] ; encoding: [0x04,0x00,0x5d,0xcc,0x00,0x05,0x12,0x1c]
+
+0x04,0x20,0x5d,0xcc,0x00,0x05,0x12,0x1c
+# GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] matrix_a_reuse ; encoding: [0x04,0x20,0x5d,0xcc,0x00,0x05,0x12,0x1c]
+
+0x04,0x40,0x5d,0xcc,0x00,0x05,0x12,0x1c
+# GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] matrix_b_reuse ; encoding: [0x04,0x40,0x5d,0xcc,0x00,0x05,0x12,0x1c]
+
+0x04,0x04,0x5d,0xcc,0x00,0x05,0x12,0x1c
+# GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1] ; encoding: [0x04,0x04,0x5d,0xcc,0x00,0x05,0x12,0x1c]
+
+0x04,0x00,0x5d,0xcc,0x00,0x05,0x12,0x9c
+# GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1] ; encoding: [0x04,0x00,0x5d,0xcc,0x00,0x05,0x12,0x9c]
+
+0x04,0x00,0x5d,0xcc,0x00,0x05,0x12,0x5c
+# GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0] ; encoding: [0x04,0x00,0x5d,0xcc,0x00,0x05,0x12,0x5c]
+
+0x04,0x00,0x5d,0xcc,0x00,0x05,0x12,0x3c
+# GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0] ; encoding: [0x04,0x00,0x5d,0xcc,0x00,0x05,0x12,0x3c]
+
+0x10,0x00,0x6d,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x6d,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x6d,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6d,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x6d,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x6d,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x6d,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x6d,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x6d,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x6d,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x6d,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x6d,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x6d,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6d,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x00,0x6c,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x6c,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x6c,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6c,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x6c,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x6c,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x6c,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x6c,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x6c,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x6c,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x6c,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x6c,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x6c,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6c,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x00,0x6b,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x6b,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x6b,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6b,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x6b,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x6b,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x6b,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x6b,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x6b,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x6b,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x6b,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x6b,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x6b,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6b,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x00,0x6a,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x6a,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x6a,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6a,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x6a,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x6a,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x6a,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x6a,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x6a,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x6a,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x6a,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x6a,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x6a,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6a,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x00,0x72,0xcc,0x00,0x11,0x06,0x1a
+# GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], 1 ; encoding: [0x10,0x00,0x72,0xcc,0x00,0x11,0x06,0x1a]
+
+0x10,0x00,0x72,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x72,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x72,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x72,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x72,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x72,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x72,0xcc,0x00,0x11,0x42,0x5c
+# GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] ; encoding: [0x10,0x00,0x72,0xcc,0x00,0x11,0x42,0x5c]
+
+0x10,0x00,0x72,0xcc,0x00,0x11,0x42,0x3c
+# GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] ; encoding: [0x10,0x00,0x72,0xcc,0x00,0x11,0x42,0x3c]
+
+0x10,0x00,0x87,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x87,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x87,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x87,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x87,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] ; encoding: [0x10,0x00,0x87,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x87,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x87,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x87,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x87,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x87,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x87,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x87,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x87,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x00,0x86,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x86,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x86,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x86,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x86,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] ; encoding: [0x10,0x00,0x86,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x86,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x86,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x86,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x86,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x86,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x86,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x86,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x86,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x00,0x85,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x85,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x85,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x85,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x85,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] ; encoding: [0x10,0x00,0x85,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x85,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x85,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x85,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x85,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x85,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x85,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x85,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x85,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x00,0x84,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x84,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x84,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x84,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x84,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] ; encoding: [0x10,0x00,0x84,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x84,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x84,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x84,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x84,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x84,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x84,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x84,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x84,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x00,0x83,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x83,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x83,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x83,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x83,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] ; encoding: [0x10,0x00,0x83,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x83,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x83,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x83,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x83,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x83,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x83,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x83,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x83,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x00,0x82,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x82,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x82,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x82,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x82,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] ; encoding: [0x10,0x00,0x82,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x82,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x82,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x82,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x82,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x82,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x82,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x82,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x82,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x00,0x81,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x81,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x81,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x81,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x81,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] ; encoding: [0x10,0x00,0x81,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x81,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x81,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x81,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x81,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x81,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x81,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x81,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x81,0xcc,0x00,0x11,0x42,0x9c]
+
+0x10,0x00,0x80,0xcc,0x00,0x11,0xca,0x1b
+# GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x80,0xcc,0x00,0x11,0xca,0x1b]
+
+0x10,0x00,0x80,0xcc,0x00,0x11,0xca,0x9b
+# GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x80,0xcc,0x00,0x11,0xca,0x9b]
+
+0x10,0x00,0x80,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] ; encoding: [0x10,0x00,0x80,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x20,0x80,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x80,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x40,0x80,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x80,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x04,0x80,0xcc,0x00,0x11,0x42,0x1c
+# GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x80,0xcc,0x00,0x11,0x42,0x1c]
+
+0x10,0x00,0x80,0xcc,0x00,0x11,0x42,0x9c
+# GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x80,0xcc,0x00,0x11,0x42,0x9c]
+
+0x04,0x40,0x88,0xcc,0x00,0x05,0xca,0x1b
+# GFX1250: v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], 1.0 ; encoding: [0x04,0x40,0x88,0xcc,0x00,0x05,0xca,0x1b]
+
+0x04,0x40,0x88,0xcc,0x00,0x05,0xca,0x9b
+# GFX1250: v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], 1.0 neg_lo:[0,0,1] ; encoding: [0x04,0x40,0x88,0xcc,0x00,0x05,0xca,0x9b]
+
+0x04,0x40,0x88,0xcc,0x00,0x05,0x12,0x1c
+# GFX1250: v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] ; encoding: [0x04,0x40,0x88,0xcc,0x00,0x05,0x12,0x1c]
+
+0x04,0x44,0x88,0xcc,0x00,0x05,0x12,0x1c
+# GFX1250: v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_hi:[0,0,1] ; encoding: [0x04,0x44,0x88,0xcc,0x00,0x05,0x12,0x1c]
+
+0x04,0x40,0x88,0xcc,0x00,0x05,0x12,0x9c
+# GFX1250: v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_lo:[0,0,1] ; encoding: [0x04,0x40,0x88,0xcc,0x00,0x05,0x12,0x9c]
+
+0x04,0x44,0x88,0xcc,0x00,0x05,0x12,0x9c
+# GFX1250: v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x04,0x44,0x88,0xcc,0x00,0x05,0x12,0x9c]
More information about the llvm-commits
mailing list