[llvm] 8f8e4e3 - [AMDGPU][MC][GFX11] Correct v_fmac_.*_e64_dpp
Dmitry Preobrazhensky via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 7 06:22:11 PDT 2022
Author: Dmitry Preobrazhensky
Date: 2022-10-07T16:21:55+03:00
New Revision: 8f8e4e3b38c4fdb2bc5336bf324af14c2ab61509
URL: https://github.com/llvm/llvm-project/commit/8f8e4e3b38c4fdb2bc5336bf324af14c2ab61509
DIFF: https://github.com/llvm/llvm-project/commit/8f8e4e3b38c4fdb2bc5336bf324af14c2ab61509.diff
LOG: [AMDGPU][MC][GFX11] Correct v_fmac_.*_e64_dpp
Differential Revision: https://reviews.llvm.org/D134961
Added:
Modified:
llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
llvm/lib/Target/AMDGPU/VOP2Instructions.td
llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s
llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt
llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index db2e331ddd37..a4e9871fd4e1 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -8863,18 +8863,43 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
}
-void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
+void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
+ bool IsDPP8) {
OptionalImmIndexMap OptionalIdx;
unsigned Opc = Inst.getOpcode();
- bool HasModifiers = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
- unsigned I = 1;
const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
+ bool HasModifiers =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
+
+ // MAC instructions are special because they have 'old'
+ // operand which is not tied to dst (but assumed to be).
+ // They also have dummy unused src2_modifiers.
+ int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
+ int Src2ModIdx =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
+ bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
+ Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
+
+ unsigned I = 1;
for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
}
int Fi = 0;
for (unsigned E = Operands.size(); I != E; ++I) {
+
+ if (IsMAC) {
+ int NumOperands = Inst.getNumOperands();
+ if (OldIdx == NumOperands) {
+ // Handle old operand
+ constexpr int DST_IDX = 0;
+ Inst.addOperand(Inst.getOperand(DST_IDX));
+ } else if (Src2ModIdx == NumOperands) {
+ // Add unused dummy src2_modifiers
+ Inst.addOperand(MCOperand::createImm(0));
+ }
+ }
+
auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
MCOI::TIED_TO);
if (TiedTo != -1) {
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index fdfb2beb16e9..7e11cd42c806 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -794,37 +794,73 @@ static VOPModifiers collectVOPModifiers(const MCInst &MI,
return Modifiers;
}
+// MAC opcodes have special old and src2 operands.
+// src2 is tied to dst, while old is not tied (but assumed to be).
+bool AMDGPUDisassembler::isMacDPP(MCInst &MI) const {
+ constexpr int DST_IDX = 0;
+ auto Opcode = MI.getOpcode();
+ const auto &Desc = MCII->get(Opcode);
+ auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
+
+ if (OldIdx != -1 && Desc.getOperandConstraint(
+ OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
+ assert(AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2) != -1);
+ assert(Desc.getOperandConstraint(
+ AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
+ MCOI::OperandConstraint::TIED_TO) == DST_IDX);
+ return true;
+ }
+
+ return false;
+}
+
+// Create dummy old operand and insert dummy unused src2_modifiers
+void AMDGPUDisassembler::convertMacDPPInst(MCInst &MI) const {
+ assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
+ insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
+ insertNamedMCOperand(MI, MCOperand::createImm(0),
+ AMDGPU::OpName::src2_modifiers);
+}
+
// We must check FI == literal to reject not genuine dpp8 insts, and we must
// first add optional MI operands to check FI
DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
unsigned Opc = MI.getOpcode();
- unsigned DescNumOps = MCII->get(Opc).getNumOperands();
if (MCII->get(Opc).TSFlags & SIInstrFlags::VOP3P) {
convertVOP3PDPPInst(MI);
} else if ((MCII->get(Opc).TSFlags & SIInstrFlags::VOPC) ||
AMDGPU::isVOPC64DPP(Opc)) {
convertVOPCDPPInst(MI);
- } else if (MI.getNumOperands() < DescNumOps &&
- AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) {
- auto Mods = collectVOPModifiers(MI);
- insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
- AMDGPU::OpName::op_sel);
} else {
- // Insert dummy unused src modifiers.
- if (MI.getNumOperands() < DescNumOps &&
- AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1)
- insertNamedMCOperand(MI, MCOperand::createImm(0),
- AMDGPU::OpName::src0_modifiers);
+ if (isMacDPP(MI))
+ convertMacDPPInst(MI);
+ unsigned DescNumOps = MCII->get(Opc).getNumOperands();
if (MI.getNumOperands() < DescNumOps &&
- AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers) != -1)
- insertNamedMCOperand(MI, MCOperand::createImm(0),
- AMDGPU::OpName::src1_modifiers);
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) {
+ auto Mods = collectVOPModifiers(MI);
+ insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
+ AMDGPU::OpName::op_sel);
+ } else {
+ // Insert dummy unused src modifiers.
+ if (MI.getNumOperands() < DescNumOps &&
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1)
+ insertNamedMCOperand(MI, MCOperand::createImm(0),
+ AMDGPU::OpName::src0_modifiers);
+
+ if (MI.getNumOperands() < DescNumOps &&
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers) != -1)
+ insertNamedMCOperand(MI, MCOperand::createImm(0),
+ AMDGPU::OpName::src1_modifiers);
+ }
}
return isValidDPP8(MI) ? MCDisassembler::Success : MCDisassembler::SoftFail;
}
DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
+ if (isMacDPP(MI))
+ convertMacDPPInst(MI);
+
unsigned Opc = MI.getOpcode();
unsigned DescNumOps = MCII->get(Opc).getNumOperands();
if (MI.getNumOperands() < DescNumOps &&
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index e987778d667b..d0aef9cdf79d 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -165,6 +165,7 @@ class AMDGPUDisassembler : public MCDisassembler {
DecodeStatus convertVOP3DPPInst(MCInst &MI) const;
DecodeStatus convertVOP3PDPPInst(MCInst &MI) const;
DecodeStatus convertVOPCDPPInst(MCInst &MI) const;
+ void convertMacDPPInst(MCInst &MI) const;
MCOperand decodeOperand_VGPR_32(unsigned Val) const;
MCOperand decodeOperand_VGPR_32_Lo128(unsigned Val) const;
@@ -260,6 +261,8 @@ class AMDGPUDisassembler : public MCDisassembler {
bool isGFX11Plus() const;
bool hasArchitectedFlatScratch() const;
+
+ bool isMacDPP(MCInst &MI) const;
};
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 5710e06f6632..98bc16464a56 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -441,6 +441,11 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v
let AsmDPP8 = getAsmDPP8<1, 2, 0, vt0>.ret;
let AsmSDWA = getAsmSDWA<1, 2, vt0>.ret;
let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt0>.ret;
+ let AsmVOP3DPPBase =
+ getAsmVOP3DPPBase<2 /*NumSrcArgs*/, HasDst, HasClamp,
+ HasOpSel, HasOMod, IsVOP3P, HasModifiers,
+ HasModifiers, HasModifiers,
+ 0 /*Src2HasMods*/, DstVT>.ret;
let HasSrc2 = 0;
let HasSrc2Mods = 0;
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
index 52198e9f2acf..0ae183f50487 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
@@ -9157,6 +9157,90 @@ v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x
v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
// GFX11: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30]
+v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
+// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3]
+// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_fmac_f16_e64_dpp v5, v1, v2 row_mirror
+// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_fmac_f16_e64_dpp v5, v1, v2 row_half_mirror
+// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_fmac_f16_e64_dpp v5, v1, v2 row_shl:1
+// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_fmac_f16_e64_dpp v5, v1, v2 row_shl:15
+// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_fmac_f16_e64_dpp v5, v1, v2 row_shr:1
+// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_fmac_f16_e64_dpp v5, v1, v2 row_shr:15
+// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_fmac_f16_e64_dpp v5, v1, v2 row_ror:1
+// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_fmac_f16_e64_dpp v5, v1, v2 row_ror:15
+// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_fmac_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: [0x05,0x01,0x36,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01]
+
+v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: [0x05,0x02,0x36,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13]
+
+v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: [0xff,0x83,0x36,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30]
+
+v_fmac_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
+// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_fmac_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3]
+// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_fmac_f32_e64_dpp v5, v1, v2 row_mirror
+// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_fmac_f32_e64_dpp v5, v1, v2 row_half_mirror
+// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_fmac_f32_e64_dpp v5, v1, v2 row_shl:1
+// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_fmac_f32_e64_dpp v5, v1, v2 row_shl:15
+// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_fmac_f32_e64_dpp v5, v1, v2 row_shr:1
+// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_fmac_f32_e64_dpp v5, v1, v2 row_shr:15
+// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_fmac_f32_e64_dpp v5, v1, v2 row_ror:1
+// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_fmac_f32_e64_dpp v5, v1, v2 row_ror:15
+// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_fmac_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_fmac_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: [0x05,0x01,0x2b,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01]
+
+v_fmac_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: [0x05,0x02,0x2b,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13]
+
+v_fmac_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: [0xff,0x83,0x2b,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30]
+
v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf
// GFX11: [0x05,0x78,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s
index bf96f7e648d4..e9e5321a0ade 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s
@@ -3957,6 +3957,30 @@ v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0]
v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1
// GFX11: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+v_fmac_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x00,0x36,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x01,0x36,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05]
+
+v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: [0x05,0x02,0x36,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05]
+
+v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: [0xff,0x83,0x36,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00]
+
+v_fmac_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x00,0x2b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_fmac_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x01,0x2b,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05]
+
+v_fmac_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: [0x05,0x02,0x2b,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05]
+
+v_fmac_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: [0xff,0x83,0x2b,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00]
+
v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x05,0x78,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt
index 938440641641..5f55d30709c8 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt
@@ -7623,6 +7623,90 @@
# GFX11: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30]
0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30
+# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
+
+# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
+
+# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
+
+# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
+
+# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
+
+# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
+
+# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
+
+# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
+
+# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
+
+# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
+
+# GFX11: v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x36,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01]
+0x05,0x01,0x36,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01
+
+# GFX11: v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x36,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13]
+0x05,0x02,0x36,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13
+
+# GFX11: v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x36,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30]
+0xff,0x83,0x36,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30
+
+# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
+
+# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
+
+# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
+
+# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
+
+# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
+
+# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
+
+# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
+
+# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
+
+# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
+
+# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
+
+# GFX11: v_fmac_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x2b,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01]
+0x05,0x01,0x2b,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01
+
+# GFX11: v_fmac_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x2b,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13]
+0x05,0x02,0x2b,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13
+
+# GFX11: v_fmac_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x2b,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30]
+0xff,0x83,0x2b,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30
+
# GFX11: v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt
index 95403ef155a1..e883b565942e 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt
@@ -3003,6 +3003,30 @@
# GFX11: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00
+# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x36,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x00,0x36,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# GFX11: v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x36,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05]
+0x05,0x01,0x36,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05
+
+# GFX11: v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x36,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05]
+0x05,0x02,0x36,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05
+
+# GFX11: v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x36,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00]
+0xff,0x83,0x36,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00
+
+# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x00,0x2b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# GFX11: v_fmac_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x2b,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05]
+0x05,0x01,0x2b,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05
+
+# GFX11: v_fmac_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x2b,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05]
+0x05,0x02,0x2b,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05
+
+# GFX11: v_fmac_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x2b,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00]
+0xff,0x83,0x2b,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00
+
# GFX11: v_mad_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05
More information about the llvm-commits
mailing list