[llvm] e12cbd8 - [AMDGPU] Fix scale opsel flags for scaled MFMA operations (#140183)
via llvm-commits
llvm-commits at lists.llvm.org
Wed May 21 10:30:26 PDT 2025
Author: Vigneshwar Jayakumar
Date: 2025-05-21T12:30:22-05:00
New Revision: e12cbd8339b89563059c2bb2a312579b652560d0
URL: https://github.com/llvm/llvm-project/commit/e12cbd8339b89563059c2bb2a312579b652560d0
DIFF: https://github.com/llvm/llvm-project/commit/e12cbd8339b89563059c2bb2a312579b652560d0.diff
LOG: [AMDGPU] Fix scale opsel flags for scaled MFMA operations (#140183)
Fix for src scale opsel flags encoding and ASM parsing for gfx950 scaled MFMA.
Added:
Modified:
llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/lib/Target/AMDGPU/VOP3PInstructions.td
llvm/lib/Target/AMDGPU/VOPInstructions.td
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll
llvm/test/MC/AMDGPU/mai-gfx950.s
llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 28370b8670f05..dc5bb00a15274 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1878,6 +1878,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
OptionalImmIndexMap &OptionalIdx);
+ void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
@@ -6784,17 +6785,25 @@ ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
return ParseStatus::Success;
}
-static void addOptionalImmOperand(
- MCInst& Inst, const OperandVector& Operands,
- AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
- AMDGPUOperand::ImmTy ImmT,
- int64_t Default = 0) {
+static void
+addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands,
+ AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
+ AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,
+ std::optional<unsigned> InsertAt = std::nullopt) {
auto i = OptionalIdx.find(ImmT);
if (i != OptionalIdx.end()) {
unsigned Idx = i->second;
- ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
+ const AMDGPUOperand &Op =
+ static_cast<const AMDGPUOperand &>(*Operands[Idx]);
+ if (InsertAt)
+ Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Op.getImm()));
+ else
+ Op.addImmOperands(Inst, 1);
} else {
- Inst.addOperand(MCOperand::createImm(Default));
+ if (InsertAt.has_value())
+ Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Default));
+ else
+ Inst.addOperand(MCOperand::createImm(Default));
}
}
@@ -8811,6 +8820,70 @@ void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
Inst.getOperand(ModIdx).setImm(ModVal);
}
}
+void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
+ const OperandVector &Operands) {
+ OptionalImmIndexMap OptionalIdx;
+ unsigned Opc = Inst.getOpcode();
+ unsigned I = 1;
+
+ const MCInstrDesc &Desc = MII.get(Opc);
+
+ for (unsigned J = 0; J < Desc.getNumDefs(); ++J)
+ static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, 1);
+
+ for (unsigned E = Operands.size(); I != E; ++I) {
+ AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);
+
+ if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
+ Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
+ } else if (Op.isImmModifier()) {
+ OptionalIdx[Op.getImmTy()] = I;
+ } else {
+ Op.addRegOrImmOperands(Inst, 1);
+ }
+ }
+
+ // Insert CBSZ and BLGP operands for F8F6F4 variants
+ int InsertPos = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCBSZ,
+ 0, InsertPos);
+ InsertPos = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyBLGP,
+ 0, InsertPos);
+
+ // Add dummy src_modifiers
+ Inst.addOperand(MCOperand::createImm(0));
+ Inst.addOperand(MCOperand::createImm(0));
+
+ // Handle op_sel fields
+
+ unsigned OpSel = 0;
+ auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
+ if (OpselIdx != OptionalIdx.end()) {
+ OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
+ .getImm();
+ }
+
+ unsigned OpSelHi = 0;
+ auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
+ if (OpselHiIdx != OptionalIdx.end()) {
+ OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
+ .getImm();
+ }
+ const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
+ AMDGPU::OpName::src1_modifiers};
+
+ for (unsigned J = 0; J < 2; ++J) {
+ unsigned ModVal = 0;
+ if (OpSel & (1 << J))
+ ModVal |= SISrcMods::OP_SEL_0;
+ if (OpSelHi & (1 << J))
+ ModVal |= SISrcMods::OP_SEL_1;
+
+ const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
+ Inst.getOperand(ModIdx).setImm(ModVal);
+ }
+}
void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
OptionalImmIndexMap &OptionalIdx) {
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index a3be49bf02648..06ee41acf41ac 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -829,12 +829,12 @@ class MFMA_F8F6F4_WithSizeTable_Helper<VOP3_Pseudo ps, string F8F8Op> :
// Currently assumes scaled instructions never have abid
class MAIFrag<SDPatternOperator Op, code pred, bit HasAbid = true, bit Scaled = false> : PatFrag <
!if(Scaled, (ops node:$src0, node:$src1, node:$src2, node:$cbsz, node:$blgp,
- node:$scale_src0_opsel, node:$scale_src0,
- node:$scale_src1_opsel, node:$scale_src1),
+ node:$src0_modifiers, node:$scale_src0,
+ node:$src1_modifiers, node:$scale_src1),
!con((ops node:$src0, node:$src1, node:$src2, node:$cbsz),
!if(HasAbid, (ops node:$abid), (ops)),
(ops node:$blgp))),
- !if(Scaled, (Op $src0, $src1, $src2, $cbsz, $blgp, $scale_src0_opsel, $scale_src0, $scale_src1_opsel, $scale_src1),
+ !if(Scaled, (Op $src0, $src1, $src2, $cbsz, $blgp, $src0_modifiers, $scale_src0, $src1_modifiers, $scale_src1),
!if(HasAbid, (Op $src0, $src1, $src2, $cbsz, $abid, $blgp),
(Op $src0, $src1, $src2, $cbsz, $blgp))),
pred
@@ -895,12 +895,12 @@ class ScaledMAIInst<string OpName, MAIInst BaseInst, SDPatternOperator node> :
let InOperandList = !con(BaseInst.InOperandList,
(ins VSrc_b32:$scale_src0,
VSrc_b32:$scale_src1,
- op_sel0:$scale_src0_opsel,
- op_sel_hi0:$scale_src1_opsel));
+ op_sel0:$src0_modifiers,
+ op_sel_hi0:$src1_modifiers));
let AsmOperands =
"$vdst, $src0, $src1, $src2, $scale_src0, $scale_src1"
- "$scale_src0_opsel$scale_src1_opsel$cbsz$blgp";
-
+ "$src0_modifiers$src1_modifiers$cbsz$blgp";
+ let AsmMatchConverter = "cvtScaledMFMA";
let FixedSize = 1;
let Size = 16;
}
@@ -2041,7 +2041,6 @@ multiclass VOP3PX_Real_ScaledMFMA<bits<7> op> {
defvar PS_VCD = !cast<VOP3_Pseudo>(NAME # "_vgprcd" # "_e64");
defvar Name = PS_ACD.Mnemonic;
defvar F8F8Name = !substr(NAME, 0, !sub(!size(NAME), !size("_fN_fM")))#"_f8_f8";
-
let SubtargetPredicate = HasGFX950Insts,
DecoderNamespace = "GFX940",
AsmString = Name # PS_ACD.AsmOperands, Constraints = "" in {
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 24032353a00e9..952ee2fe2c955 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -526,14 +526,16 @@ class VOP3PXe <bits<7> op, VOPProfile MFMAPfl, bit acc_cd = 0> : Enc128, VOP3Pe_
bits<9> scale_src0;
bits<9> scale_src1;
- bits<2> scale_src0_opsel;
- bits<2> scale_src1_opsel;
+ //MFMALdScaleModifierOp transforms 2 bit opsel input to 4 bit value
+ //where opsel and opselHi are in 3rd and 4th bit.
+ bits<4> src0_modifiers;
+ bits<4> src1_modifiers;
// Inst{7-0} = unused
// Inst{10-8} = neg_hi;
// Inst{13-11} = op_sel
- let Inst{11} = scale_src0_opsel{0};
- let Inst{12} = scale_src1_opsel{0};
+ let Inst{11} = src0_modifiers{2}; //opsel[0]
+ let Inst{12} = src1_modifiers{2}; //opsel[1]
// Inst{13} = unused op_sel
// Inst{14} = unused op_sel_hi2
@@ -542,8 +544,8 @@ class VOP3PXe <bits<7> op, VOPProfile MFMAPfl, bit acc_cd = 0> : Enc128, VOP3Pe_
let Inst{49-41} = scale_src1;
// Inst{50-58} = unused
// Inst{60-59} = op_sel_hi;
- let Inst{59} = scale_src0_opsel{1};
- let Inst{60} = scale_src1_opsel{1};
+ let Inst{59} = src0_modifiers{3}; //opsel_hi[0]
+ let Inst{60} = src1_modifiers{3}; //opsel_hi[1]
// Inst{63-61} = neg;
// The high half of the encoding is the unscaled mfma op.
@@ -1437,17 +1439,17 @@ class getVOP3MAIScaledPat<VOPProfile P, SDPatternOperator node> {
// mfma
[(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2,
timm:$cbsz, timm:$blgp,
- MFMALdScaleModifierOp:$scale_src0_opsel,
+ MFMALdScaleModifierOp:$src0_modifiers,
i32:$scale_src0,
- MFMALdScaleModifierOp:$scale_src1_opsel,
+ MFMALdScaleModifierOp:$src1_modifiers,
i32:$scale_src1
))],
// smfmac
[(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, i32:$idx,
timm:$cbsz, timm:$abid,
- MFMALdScaleModifierOp:$scale_src0_opsel,
+ MFMALdScaleModifierOp:$src0_modifiers,
i32:$scale_src0,
- MFMALdScaleModifierOp:$scale_src1_opsel,
+ MFMALdScaleModifierOp:$src1_modifiers,
i32:$scale_src1))]);
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll
index 7cc726a3bd79c..e027dda957a6d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll
@@ -46,7 +46,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_1_1__cbsz1__blgp1(<8 x
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
; GCN-NEXT: s_nop 1
-; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
+; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[1,1,0] op_sel_hi:[0,0,0]
; GCN-NEXT: s_nop 7
; GCN-NEXT: s_nop 3
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -70,7 +70,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_2_2__cbsz1__blgp1(<8 x
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
; GCN-NEXT: s_nop 1
-; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
+; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[1,1,0]
; GCN-NEXT: s_nop 7
; GCN-NEXT: s_nop 3
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -94,7 +94,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_3__cbsz1__blgp1(<8 x
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
; GCN-NEXT: s_nop 1
-; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
+; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[1,1,0] op_sel_hi:[1,1,0]
; GCN-NEXT: s_nop 7
; GCN-NEXT: s_nop 3
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -118,7 +118,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_3__cbsz1__blgp1(<8 x
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
; GCN-NEXT: s_nop 1
-; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
+; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[0,1,0] op_sel_hi:[0,1,0]
; GCN-NEXT: s_nop 7
; GCN-NEXT: s_nop 3
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -142,7 +142,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_0__cbsz1__blgp1(<8 x
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
; GCN-NEXT: s_nop 1
-; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
+; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[1,0,0] op_sel_hi:[1,0,0]
; GCN-NEXT: s_nop 7
; GCN-NEXT: s_nop 3
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -166,7 +166,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_2_3__cbsz1__blgp1(<8 x
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
; GCN-NEXT: s_nop 1
-; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
+; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[0,1,0] op_sel_hi:[1,1,0]
; GCN-NEXT: s_nop 7
; GCN-NEXT: s_nop 3
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -190,7 +190,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_2__cbsz1__blgp1(<8 x
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
; GCN-NEXT: s_nop 1
-; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
+; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[1,0,0] op_sel_hi:[1,1,0]
; GCN-NEXT: s_nop 7
; GCN-NEXT: s_nop 3
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1797,7 +1797,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_inlineimm__
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
; GCN-NEXT: s_nop 1
-; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 33, -2 op_sel_hi:[0,0,0]
+; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 33, -2 op_sel_hi:[1,1,0]
; GCN-NEXT: s_nop 7
; GCN-NEXT: s_nop 3
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1819,7 +1819,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale
; SDAG-NEXT: v_accvgpr_write_b32 a2, v18
; SDAG-NEXT: v_accvgpr_write_b32 a3, v19
; SDAG-NEXT: s_nop 1
-; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, -2 op_sel_hi:[0,0,0]
+; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, -2 op_sel_hi:[1,1,0]
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 3
; SDAG-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1837,7 +1837,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale
; GISEL-NEXT: v_accvgpr_write_b32 a3, v19
; GISEL-NEXT: v_mov_b32_e32 v16, 0x41
; GISEL-NEXT: s_nop 1
-; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel_hi:[0,0,0]
+; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel_hi:[1,1,0]
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 3
; GISEL-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1860,7 +1860,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale
; SDAG-NEXT: v_accvgpr_write_b32 a3, v19
; SDAG-NEXT: v_mov_b32_e32 v16, 0x4d
; SDAG-NEXT: s_nop 1
-; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, v16 op_sel_hi:[0,0,0]
+; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, v16 op_sel_hi:[1,1,0]
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 3
; SDAG-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1879,7 +1879,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale
; GISEL-NEXT: v_mov_b32_e32 v16, 0x41
; GISEL-NEXT: v_mov_b32_e32 v17, 0x4d
; GISEL-NEXT: s_nop 1
-; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, v17 op_sel_hi:[0,0,0]
+; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, v17 op_sel_hi:[1,1,0]
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 3
; GISEL-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1921,7 +1921,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd(<8 x i32
; SDAG-NEXT: v_accvgpr_write_b32 a3, s11
; SDAG-NEXT: v_mov_b32_e32 v17, s13
; SDAG-NEXT: s_nop 1
-; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s12, v17 op_sel_hi:[0,0,0] blgp:2
+; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s12, v17 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 3
; SDAG-NEXT: global_store_dwordx4 v16, a[0:3], s[14:15]
@@ -1946,7 +1946,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd(<8 x i32
; GISEL-NEXT: v_accvgpr_write_b32 a3, s27
; GISEL-NEXT: v_mov_b32_e32 v16, s29
; GISEL-NEXT: s_nop 1
-; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s28, v16 op_sel_hi:[0,0,0] blgp:2
+; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s28, v16 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2
; GISEL-NEXT: v_mov_b32_e32 v0, 0
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 2
@@ -1987,7 +1987,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA
; SDAG-NEXT: v_accvgpr_write_b32 a2, s2
; SDAG-NEXT: v_accvgpr_write_b32 a3, s3
; SDAG-NEXT: s_nop 1
-; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s6, -2 op_sel_hi:[0,0,0]
+; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s6, -2 op_sel:[1,1,0] op_sel_hi:[1,0,0]
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 3
; SDAG-NEXT: global_store_dwordx4 v16, a[0:3], s[4:5]
@@ -2013,7 +2013,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA
; GISEL-NEXT: v_accvgpr_write_b32 a2, s2
; GISEL-NEXT: v_accvgpr_write_b32 a3, s3
; GISEL-NEXT: s_nop 1
-; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel_hi:[0,0,0]
+; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel:[1,1,0] op_sel_hi:[1,0,0]
; GISEL-NEXT: v_mov_b32_e32 v0, 0
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 2
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll
index dac54c9f85e96..5574313f22a47 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll
@@ -134,7 +134,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_1_1__cbsz1__blgp1(<8 x
; SDAG-NEXT: v_accvgpr_write_b32 a14, v30
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0]
+; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel:[1,1,0] op_sel_hi:[0,0,0]
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 3
@@ -179,7 +179,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_1_1__cbsz1__blgp1(<8 x
; GISEL-NEXT: v_accvgpr_write_b32 a14, v30
; GISEL-NEXT: s_waitcnt vmcnt(0)
; GISEL-NEXT: s_nop 0
-; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0]
+; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel:[1,1,0] op_sel_hi:[0,0,0]
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 3
@@ -231,7 +231,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_2_2__cbsz1__blgp1(<8 x
; SDAG-NEXT: v_accvgpr_write_b32 a14, v30
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0]
+; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[1,1,0]
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 3
@@ -276,7 +276,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_2_2__cbsz1__blgp1(<8 x
; GISEL-NEXT: v_accvgpr_write_b32 a14, v30
; GISEL-NEXT: s_waitcnt vmcnt(0)
; GISEL-NEXT: s_nop 0
-; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0]
+; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[1,1,0]
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 3
@@ -328,7 +328,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_3_3__cbsz1__blgp1(<8 x
; SDAG-NEXT: v_accvgpr_write_b32 a14, v30
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0]
+; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel:[1,1,0] op_sel_hi:[1,1,0]
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 3
@@ -373,7 +373,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_3_3__cbsz1__blgp1(<8 x
; GISEL-NEXT: v_accvgpr_write_b32 a14, v30
; GISEL-NEXT: s_waitcnt vmcnt(0)
; GISEL-NEXT: s_nop 0
-; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0]
+; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel:[1,1,0] op_sel_hi:[1,1,0]
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 3
@@ -425,7 +425,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_3__cbsz1__blgp1(<8 x
; SDAG-NEXT: v_accvgpr_write_b32 a14, v30
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0]
+; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel:[0,1,0] op_sel_hi:[0,1,0]
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 3
@@ -470,7 +470,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_3__cbsz1__blgp1(<8 x
; GISEL-NEXT: v_accvgpr_write_b32 a14, v30
; GISEL-NEXT: s_waitcnt vmcnt(0)
; GISEL-NEXT: s_nop 0
-; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0]
+; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel:[0,1,0] op_sel_hi:[0,1,0]
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 3
@@ -522,7 +522,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_3_0__cbsz1__blgp1(<8 x
; SDAG-NEXT: v_accvgpr_write_b32 a14, v30
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0]
+; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel:[1,0,0] op_sel_hi:[1,0,0]
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 3
@@ -567,7 +567,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_3_0__cbsz1__blgp1(<8 x
; GISEL-NEXT: v_accvgpr_write_b32 a14, v30
; GISEL-NEXT: s_waitcnt vmcnt(0)
; GISEL-NEXT: s_nop 0
-; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0]
+; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel:[1,0,0] op_sel_hi:[1,0,0]
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 3
@@ -619,7 +619,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_2_3__cbsz1__blgp1(<8 x
; SDAG-NEXT: v_accvgpr_write_b32 a14, v30
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0]
+; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel:[0,1,0] op_sel_hi:[1,1,0]
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 3
@@ -664,7 +664,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_2_3__cbsz1__blgp1(<8 x
; GISEL-NEXT: v_accvgpr_write_b32 a14, v30
; GISEL-NEXT: s_waitcnt vmcnt(0)
; GISEL-NEXT: s_nop 0
-; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0]
+; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel:[0,1,0] op_sel_hi:[1,1,0]
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 3
@@ -716,7 +716,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_3_2__cbsz1__blgp1(<8 x
; SDAG-NEXT: v_accvgpr_write_b32 a14, v30
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0]
+; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel:[1,0,0] op_sel_hi:[1,1,0]
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 3
@@ -761,7 +761,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_3_2__cbsz1__blgp1(<8 x
; GISEL-NEXT: v_accvgpr_write_b32 a14, v30
; GISEL-NEXT: s_waitcnt vmcnt(0)
; GISEL-NEXT: s_nop 0
-; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0]
+; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel:[1,0,0] op_sel_hi:[1,1,0]
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 3
@@ -4135,7 +4135,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_inlineimm__
; GCN-NEXT: v_accvgpr_write_b32 a14, v30
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_nop 0
-; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], 33, -2 op_sel_hi:[0,0,0]
+; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], 33, -2 op_sel_hi:[1,1,0]
; GCN-NEXT: s_nop 7
; GCN-NEXT: s_nop 7
; GCN-NEXT: s_nop 3
@@ -4183,7 +4183,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scale
; SDAG-NEXT: v_accvgpr_write_b32 a14, v30
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, -2 op_sel_hi:[0,0,0]
+; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, -2 op_sel_hi:[1,1,0]
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 3
@@ -4227,7 +4227,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scale
; GISEL-NEXT: v_accvgpr_write_b32 a14, v30
; GISEL-NEXT: s_waitcnt vmcnt(0)
; GISEL-NEXT: s_nop 0
-; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, -2 op_sel_hi:[0,0,0]
+; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, -2 op_sel_hi:[1,1,0]
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 3
@@ -4276,7 +4276,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scale
; SDAG-NEXT: v_accvgpr_write_b32 a14, v30
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, v31 op_sel_hi:[0,0,0]
+; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, v31 op_sel_hi:[1,1,0]
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 3
@@ -4321,7 +4321,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scale
; GISEL-NEXT: v_accvgpr_write_b32 a14, v30
; GISEL-NEXT: s_waitcnt vmcnt(0)
; GISEL-NEXT: s_nop 0
-; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0]
+; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[1,1,0]
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 3
@@ -4387,7 +4387,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd(<8 x i32>
; SDAG-NEXT: v_accvgpr_write_b32 a15, s51
; SDAG-NEXT: v_mov_b32_e32 v16, s1
; SDAG-NEXT: s_nop 1
-; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, v16 op_sel_hi:[0,0,0] blgp:2
+; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, v16 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2
; SDAG-NEXT: v_mov_b32_e32 v0, 0
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 7
@@ -4430,7 +4430,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd(<8 x i32>
; GISEL-NEXT: v_accvgpr_write_b32 a15, s51
; GISEL-NEXT: v_mov_b32_e32 v16, s1
; GISEL-NEXT: s_nop 1
-; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, v16 op_sel_hi:[0,0,0] blgp:2
+; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, v16 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2
; GISEL-NEXT: v_mov_b32_e32 v0, 0
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 7
@@ -4486,7 +4486,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd___scaleA_
; SDAG-NEXT: v_accvgpr_write_b32 a14, s50
; SDAG-NEXT: v_accvgpr_write_b32 a15, s51
; SDAG-NEXT: s_nop 1
-; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s2, -2 op_sel_hi:[0,0,0] blgp:2
+; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s2, -2 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2
; SDAG-NEXT: v_mov_b32_e32 v0, 0
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 7
@@ -4529,7 +4529,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd___scaleA_
; GISEL-NEXT: v_accvgpr_write_b32 a14, s50
; GISEL-NEXT: v_accvgpr_write_b32 a15, s51
; GISEL-NEXT: s_nop 1
-; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v16, -2 op_sel_hi:[0,0,0] blgp:2
+; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v16, -2 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2
; GISEL-NEXT: v_mov_b32_e32 v0, 0
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 7
diff --git a/llvm/test/MC/AMDGPU/mai-gfx950.s b/llvm/test/MC/AMDGPU/mai-gfx950.s
index dd090cb73e56d..cf8ca70c07a3f 100644
--- a/llvm/test/MC/AMDGPU/mai-gfx950.s
+++ b/llvm/test/MC/AMDGPU/mai-gfx950.s
@@ -375,7 +375,7 @@ v_mfma_f32_32x32x64_f8f6f4 a[0:15], a[4:11], a[4:9], a[0:15] cbsz:1 blgp:3
//===----------------------------------------------------------------------===//
// v_mfma_scale_f32_16x16x128_f8f6f4
//===----------------------------------------------------------------------===//
-// FIXME: Test op_sel, neg, clamp
+// FIXME: Test neg, clamp
// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], v24, v25 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
@@ -445,30 +445,81 @@ v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], 33, 9
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 cbsz:3 blgp:1
-// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel_hi:[0,0,0] cbsz:3 blgp:1 ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x08,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x24]
+// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel:[0,1,0] op_sel_hi:[0,0,0] cbsz:3 blgp:1 ; encoding: [0x00,0x10,0xac,0xd3,0x18,0x33,0x02,0x00,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x24]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel:[0,1,0] cbsz:3 blgp:1
-// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel_hi:[0,0,0] cbsz:3 blgp:1 ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x18,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x24]
+// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel:[0,1,0] op_sel_hi:[0,1,0] cbsz:3 blgp:1 ; encoding: [0x00,0x10,0xac,0xd3,0x18,0x33,0x02,0x10,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x24]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel:[0,1,0] op_sel_hi:[0,1,0] cbsz:3 blgp:1
-// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:17], v[20:23], v24, v25 op_sel_hi:[0,0,0] blgp:2 ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x18,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x44]
+// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:17], v[20:23], v24, v25 op_sel:[0,1,0] op_sel_hi:[0,1,0] blgp:2 ; encoding: [0x00,0x10,0xac,0xd3,0x18,0x33,0x02,0x10,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x44]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:17], v[20:23], v24, v25 op_sel:[0,1,0] op_sel_hi:[0,1,0] blgp:2
-// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], v24, v25 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x18,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04]
+// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], v24, v25 op_sel:[0,1,0] op_sel_hi:[0,1,0] ; encoding: [0x00,0x10,0xac,0xd3,0x18,0x33,0x02,0x10,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], v24, v25 op_sel:[0,1,0] op_sel_hi:[0,1,0]
-// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel_hi:[0,0,0] cbsz:3 ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x18,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x04]
+// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel:[0,1,0] op_sel_hi:[0,1,0] cbsz:3 ; encoding: [0x00,0x10,0xac,0xd3,0x18,0x33,0x02,0x10,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x04]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel:[0,1,0] op_sel_hi:[0,1,0] cbsz:3
+// op_sel combinations
+
+// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel:[0,1,0] op_sel_hi:[0,0,0] cbsz:3 blgp:1 ; encoding: [0x00,0x10,0xac,0xd3,0x18,0x33,0x02,0x00,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x24]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel:[0,1,0] cbsz:3 blgp:1
+
+// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel:[1,0,0] op_sel_hi:[0,0,0] cbsz:3 blgp:1 ; encoding: [0x00,0x08,0xac,0xd3,0x18,0x33,0x02,0x00,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x24]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel:[1,0,0] cbsz:3 blgp:1
+
+// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel:[1,1,0] op_sel_hi:[0,0,0] cbsz:3 blgp:1 ; encoding: [0x00,0x18,0xac,0xd3,0x18,0x33,0x02,0x00,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x24]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel:[1,1,0] cbsz:3 blgp:1
+
+// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel_hi:[0,1,0] cbsz:3 blgp:1 ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x10,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x24]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel_hi:[0,1,0] cbsz:3 blgp:1
+
+// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel_hi:[1,0,0] cbsz:3 blgp:1 ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x08,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x24]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel_hi:[1,0,0] cbsz:3 blgp:1
+
+// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel_hi:[1,1,0] cbsz:3 blgp:1 ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x18,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x24]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel_hi:[1,1,0] cbsz:3 blgp:1
+
+// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel:[0,1,0] op_sel_hi:[1,1,0] cbsz:3 blgp:1 ; encoding: [0x00,0x10,0xac,0xd3,0x18,0x33,0x02,0x18,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x24]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel:[0,1,0] op_sel_hi:[1,1,0] cbsz:3 blgp:1
+
+// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel:[0,1,0] op_sel_hi:[1,0,0] cbsz:3 blgp:1 ; encoding: [0x00,0x10,0xac,0xd3,0x18,0x33,0x02,0x08,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x24]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel:[0,1,0] op_sel_hi:[1,0,0] cbsz:3 blgp:1
+
+// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel:[0,1,0] op_sel_hi:[0,1,0] cbsz:3 blgp:1 ; encoding: [0x00,0x10,0xac,0xd3,0x18,0x33,0x02,0x10,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x24]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel:[0,1,0] op_sel_hi:[0,1,0] cbsz:3 blgp:1
+
+// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel:[1,0,0] op_sel_hi:[0,1,0] cbsz:3 blgp:1 ; encoding: [0x00,0x08,0xac,0xd3,0x18,0x33,0x02,0x10,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x24]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel:[1,0,0] op_sel_hi:[0,1,0] cbsz:3 blgp:1
+
+// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel:[1,0,0] op_sel_hi:[1,0,0] cbsz:3 blgp:1 ; encoding: [0x00,0x08,0xac,0xd3,0x18,0x33,0x02,0x08,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x24]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel:[1,0,0] op_sel_hi:[1,0,0] cbsz:3 blgp:1
+
+// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel:[1,0,0] op_sel_hi:[1,1,0] cbsz:3 blgp:1 ; encoding: [0x00,0x08,0xac,0xd3,0x18,0x33,0x02,0x18,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x24]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel:[1,0,0] op_sel_hi:[1,1,0] cbsz:3 blgp:1
+
+
//===----------------------------------------------------------------------===//
// v_mfma_scale_f32_32x32x64_f8f6f4
//===----------------------------------------------------------------------===//
-// FIXME: Test op_sel, neg, clamp
+// FIXME: Test neg, clamp
// GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], v48, v49 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0x30,0x63,0x02,0x00,0x00,0x08,0xae,0xd3,0x10,0x31,0x82,0x04]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
@@ -514,10 +565,61 @@ v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], v48, v49
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:29], v[32:47], v48, v49 blgp:2
-// GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:21], v[24:29], v[32:47], v48, v49 op_sel_hi:[0,0,0] cbsz:2 blgp:3 ; encoding: [0x00,0x00,0xac,0xd3,0x30,0x63,0x02,0x18,0x00,0x0a,0xae,0xd3,0x10,0x31,0x82,0x64]
+// GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:21], v[24:29], v[32:47], v48, v49 op_sel:[0,1,0] op_sel_hi:[0,1,0] cbsz:2 blgp:3 ; encoding: [0x00,0x10,0xac,0xd3,0x30,0x63,0x02,0x10,0x00,0x0a,0xae,0xd3,0x10,0x31,0x82,0x64]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:21], v[24:29], v[32:47], v48, v49 op_sel:[0,1,0] op_sel_hi:[0,1,0] cbsz:2 blgp:3
+// op_sel combinations
+
+// GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], v48, v49 op_sel:[0,1,0] op_sel_hi:[0,0,0] ; encoding: [0x00,0x10,0xac,0xd3,0x30,0x63,0x02,0x00,0x00,0x08,0xae,0xd3,0x10,0x31,0x82,0x04]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], v48, v49 op_sel:[0,1,0]
+
+// GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], v48, v49 op_sel:[1,0,0] op_sel_hi:[0,0,0] ; encoding: [0x00,0x08,0xac,0xd3,0x30,0x63,0x02,0x00,0x00,0x08,0xae,0xd3,0x10,0x31,0x82,0x04]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], v48, v49 op_sel:[1,0,0]
+
+// GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], v48, v49 op_sel:[1,1,0] op_sel_hi:[0,0,0] ; encoding: [0x00,0x18,0xac,0xd3,0x30,0x63,0x02,0x00,0x00,0x08,0xae,0xd3,0x10,0x31,0x82,0x04]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], v48, v49 op_sel:[1,1,0]
+
+// GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], v48, v49 op_sel_hi:[0,1,0] ; encoding: [0x00,0x00,0xac,0xd3,0x30,0x63,0x02,0x10,0x00,0x08,0xae,0xd3,0x10,0x31,0x82,0x04]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], v48, v49 op_sel_hi:[0,1,0]
+
+// GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], v48, v49 op_sel_hi:[1,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0x30,0x63,0x02,0x08,0x00,0x08,0xae,0xd3,0x10,0x31,0x82,0x04]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], v48, v49 op_sel_hi:[1,0,0]
+
+// GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], v48, v49 op_sel_hi:[1,1,0] ; encoding: [0x00,0x00,0xac,0xd3,0x30,0x63,0x02,0x18,0x00,0x08,0xae,0xd3,0x10,0x31,0x82,0x04]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], v48, v49 op_sel_hi:[1,1,0]
+
+// GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], v48, v49 op_sel:[0,1,0] op_sel_hi:[0,1,0] ; encoding: [0x00,0x10,0xac,0xd3,0x30,0x63,0x02,0x10,0x00,0x08,0xae,0xd3,0x10,0x31,0x82,0x04]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], v48, v49 op_sel:[0,1,0] op_sel_hi:[0,1,0]
+
+// GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], v48, v49 op_sel:[0,1,0] op_sel_hi:[1,1,0] ; encoding: [0x00,0x10,0xac,0xd3,0x30,0x63,0x02,0x18,0x00,0x08,0xae,0xd3,0x10,0x31,0x82,0x04]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], v48, v49 op_sel:[0,1,0] op_sel_hi:[1,1,0]
+
+// GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], v48, v49 op_sel:[0,1,0] op_sel_hi:[1,0,0] ; encoding: [0x00,0x10,0xac,0xd3,0x30,0x63,0x02,0x08,0x00,0x08,0xae,0xd3,0x10,0x31,0x82,0x04]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], v48, v49 op_sel:[0,1,0] op_sel_hi:[1,0,0]
+
+// GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], v48, v49 op_sel:[1,0,0] op_sel_hi:[0,1,0] ; encoding: [0x00,0x08,0xac,0xd3,0x30,0x63,0x02,0x10,0x00,0x08,0xae,0xd3,0x10,0x31,0x82,0x04]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], v48, v49 op_sel:[1,0,0] op_sel_hi:[0,1,0]
+
+// GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], v48, v49 op_sel:[1,0,0] op_sel_hi:[1,0,0] ; encoding: [0x00,0x08,0xac,0xd3,0x30,0x63,0x02,0x08,0x00,0x08,0xae,0xd3,0x10,0x31,0x82,0x04]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], v48, v49 op_sel:[1,0,0] op_sel_hi:[1,0,0]
+
+// GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], v48, v49 op_sel:[1,0,0] op_sel_hi:[1,1,0] ; encoding: [0x00,0x08,0xac,0xd3,0x30,0x63,0x02,0x18,0x00,0x08,0xae,0xd3,0x10,0x31,0x82,0x04]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], v48, v49 op_sel:[1,0,0] op_sel_hi:[1,1,0]
+
+
//===----------------------------------------------------------------------===//
// v_mfma_f32_16x16x128_f8f6f4 with appropriate register widths
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt
index 8adc8b79fbbf5..accd1cc8db03d 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt
@@ -386,7 +386,7 @@
# GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:15], v[20:23], v24, v25 op_sel_hi:[0,0,0] cbsz:1 blgp:4 ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x00,0x00,0x09,0xad,0xd3,0x04,0x19,0x52,0x84]
0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x00,0x00,0x09,0xad,0xd3,0x04,0x19,0x52,0x84
-# GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:17], v[20:23], v24, v25 op_sel_hi:[0,0,0] blgp:2 ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x18,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x44]
+# GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:17], v[20:23], v24, v25 op_sel_hi:[1,1,0] blgp:2 ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x18,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x44]
0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x18,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x44
# GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:17], v[20:23], v24, v25 op_sel_hi:[0,0,0] cbsz:1 blgp:3 ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x00,0x00,0x09,0xad,0xd3,0x04,0x19,0x52,0x64]
@@ -416,7 +416,7 @@
# GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], v24, v25 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04]
0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04
-# GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], v24, v25 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x18,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04]
+# GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], v24, v25 op_sel_hi:[1,1,0] ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x18,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04]
0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x18,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04
# GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], v24, v25 op_sel_hi:[0,0,0] blgp:1 ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x24]
@@ -452,16 +452,16 @@
# GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:17], v[20:23], v24, v25 op_sel_hi:[0,0,0] cbsz:3 blgp:3 ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x00,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x64]
0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x00,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x64
-# GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel_hi:[0,0,0] cbsz:3 ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x18,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x04]
+# GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel_hi:[1,1,0] cbsz:3 ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x18,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x04]
0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x18,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x04
# GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel_hi:[0,0,0] cbsz:3 blgp:1 ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x00,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x24]
0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x00,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x24
-# GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel_hi:[0,0,0] cbsz:3 blgp:1 ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x08,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x24]
+# GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel_hi:[1,0,0] cbsz:3 blgp:1 ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x08,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x24]
0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x08,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x24
-# GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel_hi:[0,0,0] cbsz:3 blgp:1 ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x18,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x24]
+# GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel_hi:[1,1,0] cbsz:3 blgp:1 ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x18,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x24]
0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x18,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x24
# GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[50:53], v[4:11], v[12:19], v[20:23], v24, v25 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x00,0x32,0x08,0xad,0xd3,0x04,0x19,0x52,0x04]
@@ -530,7 +530,7 @@
# GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:21], v[24:29], v[32:47], v48, v49 op_sel_hi:[0,0,0] cbsz:2 blgp:3 ; encoding: [0x00,0x00,0xac,0xd3,0x30,0x63,0x02,0x00,0x00,0x0a,0xae,0xd3,0x10,0x31,0x82,0x64]
0x00,0x00,0xac,0xd3,0x30,0x63,0x02,0x00,0x00,0x0a,0xae,0xd3,0x10,0x31,0x82,0x64
-# GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:21], v[24:29], v[32:47], v48, v49 op_sel_hi:[0,0,0] cbsz:2 blgp:3 ; encoding: [0x00,0x00,0xac,0xd3,0x30,0x63,0x02,0x18,0x00,0x0a,0xae,0xd3,0x10,0x31,0x82,0x64]
+# GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:21], v[24:29], v[32:47], v48, v49 op_sel_hi:[1,1,0] cbsz:2 blgp:3 ; encoding: [0x00,0x00,0xac,0xd3,0x30,0x63,0x02,0x18,0x00,0x0a,0xae,0xd3,0x10,0x31,0x82,0x64]
0x00,0x00,0xac,0xd3,0x30,0x63,0x02,0x18,0x00,0x0a,0xae,0xd3,0x10,0x31,0x82,0x64
# GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:21], v[24:29], v[32:47], v48, v49 op_sel_hi:[0,0,0] cbsz:3 blgp:2 ; encoding: [0x00,0x00,0xac,0xd3,0x30,0x63,0x02,0x00,0x00,0x0b,0xae,0xd3,0x10,0x31,0x82,0x44]
More information about the llvm-commits
mailing list