[llvm] 07b7fad - [AMDGPU] gfx11 VOPD instructions MC support
Joe Nash via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 24 08:38:18 PDT 2022
Author: Joe Nash
Date: 2022-06-24T11:08:39-04:00
New Revision: 07b7fada73da5e371607cf42b60b5d1a2706ad1c
URL: https://github.com/llvm/llvm-project/commit/07b7fada73da5e371607cf42b60b5d1a2706ad1c
DIFF: https://github.com/llvm/llvm-project/commit/07b7fada73da5e371607cf42b60b5d1a2706ad1c.diff
LOG: [AMDGPU] gfx11 VOPD instructions MC support
VOPD is a new encoding for dual-issue instructions for use in wave32.
This patch includes MC layer support only.
A VOPD instruction is constituted of an X component (for which there are
13 possible opcodes) and a Y component (for which there are the 13 X
opcodes plus 3 more). Most of the complexity in defining and parsing
a VOPD operation arises from the possible different total numbers of
operands and deferred parsing of certain operands depending on the
constituent X and Y opcodes.
Reviewed By: dp
Differential Revision: https://reviews.llvm.org/D128218
Added:
llvm/lib/Target/AMDGPU/VOPDInstructions.td
llvm/test/MC/AMDGPU/vopd.s
Modified:
llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
llvm/lib/Target/AMDGPU/SIInstrInfo.td
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
llvm/lib/Target/AMDGPU/VOP1Instructions.td
llvm/lib/Target/AMDGPU/VOP2Instructions.td
llvm/lib/Target/AMDGPU/VOPInstructions.td
llvm/test/MC/AMDGPU/gfx11_err.s
llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 69fa94ed5b476..31012915457b0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -87,6 +87,17 @@ class PredConcat<list<Predicate> lst, Predicate pred> {
!listconcat([pred], !filter(item, lst, !ne(item, pred)));
}
+// Add a Register to the list if does not already exist
+class RegAppend<list<Register> lst, Register reg> {
+ list<Register> ret =
+ !listconcat([reg], !filter(item, lst, !ne(item, reg)));
+}
+// Get the union of two Register lists
+class RegListUnion<list<Register> lstA, list<Register> lstB> {
+ list<Register> ret =
+ !foldl(lstA, lstB, temp, item, RegAppend<temp, item>.ret);
+}
+
class PredicateControl {
Predicate SubtargetPredicate = TruePredicate;
Predicate AssemblerPredicate = TruePredicate;
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index db77b0eb3dc17..e12d0ffef35c8 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1680,7 +1680,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
bool parseExpr(int64_t &Imm, StringRef Expected = "");
bool parseExpr(OperandVector &Operands);
StringRef getTokenStr() const;
- AsmToken peekToken();
+ AsmToken peekToken(bool ShouldSkipSpace = true);
AsmToken getToken() const;
SMLoc getLoc() const;
void lex();
@@ -1738,6 +1738,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
+ void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
OptionalImmIndexMap &OptionalIdx);
@@ -1804,6 +1805,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
AMDGPUOperand::Ptr defaultWaitVDST() const;
AMDGPUOperand::Ptr defaultWaitEXP() const;
+ OperandMatchResultTy parseVOPD(OperandVector &Operands);
};
struct OptionalOperand {
@@ -2909,7 +2911,8 @@ OperandMatchResultTy
AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
// TODO: add syntactic sugar for 1/(2*PI)
- assert(!isRegister());
+ if (isRegister())
+ return MatchOperand_NoMatch;
assert(!isModifier());
const auto& Tok = getToken();
@@ -5671,8 +5674,13 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
OperandMatchResultTy
AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
OperandMode Mode) {
+ OperandMatchResultTy ResTy = parseVOPD(Operands);
+ if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
+ isToken(AsmToken::EndOfStatement))
+ return ResTy;
+
// Try to parse with a custom parser
- OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+ ResTy = MatchOperandParserImpl(Operands, Mnemonic);
// If we successfully parsed the operand or if there as an error parsing,
// we are done.
@@ -7108,9 +7116,10 @@ AMDGPUAsmParser::getToken() const {
return Parser.getTok();
}
-AsmToken
-AMDGPUAsmParser::peekToken() {
- return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
+AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
+ return isToken(AsmToken::EndOfStatement)
+ ? getToken()
+ : getLexer().peekTok(ShouldSkipSpace);
}
void
@@ -8316,6 +8325,118 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
cvtVOP3P(Inst, Operands, OptIdx);
}
+//===----------------------------------------------------------------------===//
+// VOPD
+//===----------------------------------------------------------------------===//
+
+OperandMatchResultTy AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
+ if (!hasVOPD(getSTI()))
+ return MatchOperand_NoMatch;
+
+ if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
+ SMLoc S = getLoc();
+ lex();
+ lex();
+ Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
+ const MCExpr *Expr;
+ if (isToken(AsmToken::Identifier) && !Parser.parseExpression(Expr)) {
+ Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
+ return MatchOperand_Success;
+ }
+ Error(S, "invalid VOPD :: usage");
+ return MatchOperand_ParseFail;
+ }
+ return MatchOperand_NoMatch;
+}
+
+// Create VOPD MCInst operands using parsed assembler operands.
+// Parsed VOPD operands are ordered as follows:
+// OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::'
+// OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
+// If both OpX and OpY have an imm, the first imm has a
diff erent name:
+// OpXMnemo dstX src0X [vsrc1X|immDeferred vsrc1X|vsrc1X immDeferred] '::'
+// OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
+// MCInst operands have the following order:
+// dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
+void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
+ auto addOp = [&](uint16_t i) { // NOLINT:function pointer
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+ if (Op.isReg()) {
+ Op.addRegOperands(Inst, 1);
+ return;
+ }
+ if (Op.isImm()) {
+ Op.addImmOperands(Inst, 1);
+ return;
+ }
+ // Handle tokens like 'offen' which are sometimes hard-coded into the
+ // asm string. There are no MCInst operands for these.
+ if (Op.isToken()) {
+ return;
+ }
+ llvm_unreachable("Unhandled operand type in cvtVOPD");
+ };
+
+ // Indices into MCInst.Operands
+ const auto FmamkOpXImmMCIndex = 3; // dstX, dstY, src0X, imm, ...
+ const auto FmaakOpXImmMCIndex = 4; // dstX, dstY, src0X, src1X, imm, ...
+ const auto MinOpYImmMCIndex = 4; // dstX, dstY, src0X, src0Y, imm, ...
+
+ unsigned Opc = Inst.getOpcode();
+ bool HasVsrc1X =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1X) != -1;
+ bool HasImmX =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 ||
+ (HasVsrc1X && (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) ==
+ FmamkOpXImmMCIndex ||
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) ==
+ FmaakOpXImmMCIndex));
+
+ bool HasVsrc1Y =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1Y) != -1;
+ bool HasImmY =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 ||
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) >=
+ MinOpYImmMCIndex + HasVsrc1X;
+
+ // Indices of parsed operands relative to dst
+ const auto DstIdx = 0;
+ const auto Src0Idx = 1;
+ const auto Vsrc1OrImmIdx = 2;
+
+ const auto OpXOperandsSize = 2 + HasImmX + HasVsrc1X;
+ const auto BridgeTokensSize = 2; // Special VOPD tokens ('::' and OpYMnemo)
+
+ // Offsets into parsed operands
+ const auto OpXFirstOperandOffset = 1;
+ const auto OpYFirstOperandOffset =
+ OpXFirstOperandOffset + OpXOperandsSize + BridgeTokensSize;
+
+ // Order of addOp calls determines MC operand order
+ addOp(OpXFirstOperandOffset + DstIdx); // vdstX
+ addOp(OpYFirstOperandOffset + DstIdx); // vdstY
+
+ addOp(OpXFirstOperandOffset + Src0Idx); // src0X
+ if (HasImmX) {
+ // immX then vsrc1X for fmamk, vsrc1X then immX for fmaak
+ addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx);
+ addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx + 1);
+ } else {
+ if (HasVsrc1X) // all except v_mov
+ addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1X
+ }
+
+ addOp(OpYFirstOperandOffset + Src0Idx); // src0Y
+ if (HasImmY) {
+ // immY then vsrc1Y for fmamk, vsrc1Y then immY for fmaak
+ addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx);
+ addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx + 1);
+ } else {
+ if (HasVsrc1Y) // all except v_mov
+ addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1Y
+ }
+}
+
//===----------------------------------------------------------------------===//
// dpp
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 804d299a32470..7f8397adcfb82 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -288,6 +288,12 @@ decodeOperand_VS_32_Deferred(MCInst &Inst, unsigned Imm, uint64_t Addr,
Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW32, Imm, true));
}
+static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
+ uint64_t Addr, const void *Decoder) {
+ const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+ return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
+}
+
static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
const MCRegisterInfo *MRI) {
if (OpIdx < 0)
@@ -448,6 +454,9 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
convertVOPCDPPInst(MI);
break;
}
+ Res = tryDecodeInst(DecoderTableGFX1196, MI, DecW, Address);
+ if (Res)
+ break;
}
// Reinitialize Bytes
Bytes = Bytes_.slice(0, MaxInstBytesNum);
@@ -971,6 +980,8 @@ DecodeStatus AMDGPUDisassembler::convertFMAanyK(MCInst &MI,
assert(HasLiteral && "Should have decoded a literal");
const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
unsigned DescNumOps = Desc.getNumOperands();
+ insertNamedMCOperand(MI, MCOperand::createImm(Literal),
+ AMDGPU::OpName::immDeferred);
assert(DescNumOps == MI.getNumOperands());
for (unsigned I = 0; I < DescNumOps; ++I) {
auto &Op = MI.getOperand(I);
@@ -1213,6 +1224,9 @@ MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const {
MCOperand
AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
if (HasLiteral) {
+ assert(
+ AMDGPU::hasVOPD(STI) &&
+ "Should only decode multiple kimm with VOPD, check VSrc operand types");
if (Literal != Val)
return errOperand(Val, "More than one unique literal is illegal");
}
@@ -1505,6 +1519,20 @@ MCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) c
llvm_unreachable("unknown dst register");
}
+// Bit 0 of DstY isn't stored in the instruction, because it's always the
+// opposite of bit 0 of DstX.
+MCOperand AMDGPUDisassembler::decodeVOPDDstYOp(MCInst &Inst,
+ unsigned Val) const {
+ int VDstXInd =
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
+ assert(VDstXInd != -1);
+ assert(Inst.getOperand(VDstXInd).isReg());
+ unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
+ Val |= ~XDstReg & 1;
+ auto Width = llvm::AMDGPUDisassembler::OPW32;
+ return createRegOperand(getVgprClassId(Width), Val);
+}
+
MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
using namespace AMDGPU;
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index c6717ef50ac37..31869f0917ae3 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -233,6 +233,7 @@ class AMDGPUDisassembler : public MCDisassembler {
MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val,
bool MandatoryLiteral = false) const;
MCOperand decodeDstOp(const OpWidthTy Width, unsigned Val) const;
+ MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const;
MCOperand decodeSpecialReg32(unsigned Val) const;
MCOperand decodeSpecialReg64(unsigned Val) const;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 7ce2a90073256..806ce7415f038 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2074,6 +2074,15 @@ class getAsm32 <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
!if(!eq(NumSrcArgs, 3), src0#src1#src2, "");
}
+class getAsmVOPDPart <int NumSrcArgs, string XorY> {
+ string dst = "$vdst" # XorY;
+ string src0 = ", $src0" # XorY;
+ string src1 = ", $vsrc1" # XorY;
+ string ret = dst #
+ !if(!ge(NumSrcArgs, 1), src0, "") #
+ !if(!ge(NumSrcArgs, 2), src1, "");
+}
+
// Returns the assembly string for the inputs and outputs of a VOP3
// instruction.
class getAsm64 <bit HasDst, int NumSrcArgs, bit HasIntClamp, bit HasModifiers,
@@ -2513,6 +2522,14 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
HasSDWAOMod, Src0ModSDWA, Src1ModSDWA,
DstVT>.ret;
+ field dag InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X);
+ // It is a slight misnomer to use the deferred f32 operand type for non-float
+ // operands, but this operand type will only be used if the other dual
+ // component is FMAAK or FMAMK
+ field dag InsVOPDXDeferred = (ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X, VGPR_32:$vsrc1X);
+ field dag InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y);
+ field dag InsVOPDYDeferred = (ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y, VGPR_32:$vsrc1Y);
+
field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasIntClamp, HasModifiers, HasOMod, DstVT>.ret;
@@ -2536,6 +2553,8 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
field string AsmVOP3DPP8 = getAsmVOP3DPP8<AsmVOP3DPPBase>.ret;
field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret;
field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret;
+ field string AsmVOPDX = getAsmVOPDPart<NumSrcArgs, "X">.ret;
+ field string AsmVOPDY = getAsmVOPDPart<NumSrcArgs, "Y">.ret;
field string TieRegDPP = "$old";
}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 435259e91206f..9776abf84305f 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1783,6 +1783,10 @@ bool hasMAIInsts(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureMAIInsts];
}
+bool hasVOPD(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureVOPD];
+}
+
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
int32_t ArgNumVGPR) {
if (has90AInsts && ArgNumAGPR)
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index b6019175c2ba6..5a64c4e6b255d 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -823,6 +823,7 @@ bool isGFX90A(const MCSubtargetInfo &STI);
bool isGFX940(const MCSubtargetInfo &STI);
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
bool hasMAIInsts(const MCSubtargetInfo &STI);
+bool hasVOPD(const MCSubtargetInfo &STI);
int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
/// Is Reg - scalar register
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index f307155df6fee..1d374a9f90ba9 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -110,13 +110,17 @@ class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
}
multiclass VOP1Inst <string opName, VOPProfile P,
- SDPatternOperator node = null_frag> {
+ SDPatternOperator node = null_frag, int VOPDOp = -1> {
// We only want to set this on the basic, non-SDWA or DPP forms.
defvar should_mov_imm = !or(!eq(opName, "v_mov_b32"),
!eq(opName, "v_mov_b64"));
let isMoveImm = should_mov_imm in {
- def _e32 : VOP1_Pseudo <opName, P>;
+ if !eq(VOPDOp, -1) then
+ def _e32 : VOP1_Pseudo <opName, P>;
+ else
+ // Only for V_MOV_B32
+ def _e32 : VOP1_Pseudo <opName, P>, VOPD_Component<VOPDOp, "v_mov_b32">;
def _e64 : VOP3InstBase <opName, P, node>;
}
@@ -182,8 +186,15 @@ let VOPAsmPrefer32Bit = 1 in {
defm V_NOP : VOP1Inst <"v_nop", VOP_NOP_PROFILE>;
}
+def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> {
+ let InsVOPDX = (ins Src0RC32:$src0X);
+ let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X);
+ let InsVOPDY = (ins Src0RC32:$src0Y);
+ let InsVOPDYDeferred = (ins VSrc_f32_Deferred:$src0Y);
+}
+
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
-defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOP_I32_I32>;
+defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>;
let SubtargetPredicate = isGFX940Plus in
defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>;
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 2b3444eaac74b..5f6f22c0ff06f 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -140,7 +140,13 @@ multiclass VOP2Inst_e32<string opName,
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
} // End renamedInGFX9 = GFX9Renamed
}
-
+multiclass
+ VOP2Inst_e32_VOPD<string opName, VOPProfile P, bits<5> VOPDOp,
+ string VOPDName, SDPatternOperator node = null_frag,
+ string revOp = opName, bit GFX9Renamed = 0> {
+ defm NAME : VOP2Inst_e32<opName, P, node, revOp, GFX9Renamed>,
+ VOPD_Component<VOPDOp, VOPDName>;
+}
multiclass VOP2Inst_e64<string opName,
VOPProfile P,
SDPatternOperator node = null_frag,
@@ -180,6 +186,22 @@ multiclass VOP2Inst<string opName,
}
}
+multiclass VOP2Inst_VOPD<string opName,
+ VOPProfile P,
+ bits<5> VOPDOp,
+ string VOPDName,
+ SDPatternOperator node = null_frag,
+ string revOp = opName,
+ bit GFX9Renamed = 0> :
+ VOP2Inst_e32_VOPD<opName, P, VOPDOp, VOPDName, node, revOp, GFX9Renamed>,
+ VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>,
+ VOP2Inst_sdwa<opName, P, GFX9Renamed> {
+ let renamedInGFX9 = GFX9Renamed in {
+ foreach _ = BoolToList<P.HasExtDPP>.ret in
+ def _dpp : VOP2_DPP_Pseudo <opName, P>;
+ }
+}
+
multiclass VOP2bInst <string opName,
VOPProfile P,
SDPatternOperator node = null_frag,
@@ -230,16 +252,19 @@ multiclass VOP2bInstAliases<VOP2_Pseudo ps, VOP2_Real inst, string OpName> {
}
}
-multiclass VOP2eInst <string opName,
- VOPProfile P,
- SDPatternOperator node = null_frag,
- string revOp = opName,
- bit useSGPRInput = !eq(P.NumSrcArgs, 3)> {
+multiclass
+ VOP2eInst_Base<string opName, VOPProfile P, bits<5> VOPDOp, string VOPDName,
+ SDPatternOperator node, string revOp, bit useSGPRInput> {
let SchedRW = [Write32Bit] in {
let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in {
- def _e32 : VOP2_Pseudo <opName, P>,
- Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
+ if !eq(VOPDOp, -1) then
+ def _e32 : VOP2_Pseudo <opName, P>,
+ Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
+ else
+ def _e32 : VOP2_Pseudo <opName, P>,
+ Commutable_REV<revOp#"_e32", !eq(revOp, opName)>,
+ VOPD_Component<VOPDOp, VOPDName>;
foreach _ = BoolToList<P.HasExtSDWA>.ret in
def _sdwa : VOP2_SDWA_Pseudo <opName, P> {
@@ -262,6 +287,16 @@ multiclass VOP2eInst <string opName,
}
}
+multiclass
+ VOP2eInst<string opName, VOPProfile P, SDPatternOperator node = null_frag,
+ string revOp = opName, bit useSGPRInput = !eq(P.NumSrcArgs, 3)>
+ : VOP2eInst_Base<opName, P, -1, "", node, revOp, useSGPRInput>;
+
+multiclass
+ VOP2eInst_VOPD<string opName, VOPProfile P, bits<5> VOPDOp, string VOPDName,
+ SDPatternOperator node = null_frag, string revOp = opName,
+ bit useSGPRInput = !eq(P.NumSrcArgs, 3)>
+ : VOP2eInst_Base<opName, P, VOPDOp, VOPDName, node, revOp, useSGPRInput>;
class VOP2eInstAlias <VOP2_Pseudo ps, Instruction inst, string opnd = ""> :
InstAlias <ps.OpName#" "#ps.Pfl.Asm32#", "#opnd,
@@ -283,12 +318,24 @@ multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> {
}
}
-class VOP_MADAK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
+class VOP_MADK_Base<ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
+ string AsmVOPDXDeferred = ?;
+}
+
+class VOP_MADAK <ValueType vt> : VOP_MADK_Base<vt> {
field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
field dag Ins32 = !if(!eq(vt.Size, 32),
(ins VSrc_f32_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm),
(ins VSrc_f16_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm));
+ field dag InsVOPDX = (ins VSrc_f32_Deferred:$src0X, VGPR_32:$vsrc1X, ImmOpType:$imm);
+ // Note that both src0X and imm are deferred
+ let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X, VGPR_32:$vsrc1X, ImmOpType:$immDeferred);
+ field dag InsVOPDY = (ins VSrc_f32_Deferred:$src0Y, VGPR_32:$vsrc1Y, ImmOpType:$imm);
+
field string Asm32 = "$vdst, $src0, $src1, $imm";
+ field string AsmVOPDX = "$vdstX, $src0X, $vsrc1X, $imm";
+ let AsmVOPDXDeferred = "$vdstX, $src0X, $vsrc1X, $immDeferred";
+ field string AsmVOPDY = "$vdstY, $src0Y, $vsrc1Y, $imm";
field bit HasExt = 0;
let IsSingle = 1;
}
@@ -296,10 +343,17 @@ class VOP_MADAK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
def VOP_MADAK_F16 : VOP_MADAK <f16>;
def VOP_MADAK_F32 : VOP_MADAK <f32>;
-class VOP_MADMK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
+class VOP_MADMK <ValueType vt> : VOP_MADK_Base<vt> {
field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
field dag Ins32 = (ins VSrc_f32_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1);
+ field dag InsVOPDX = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$imm, VGPR_32:$vsrc1X);
+ let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$immDeferred, VGPR_32:$vsrc1X);
+ field dag InsVOPDY = (ins VSrc_f32_Deferred:$src0Y, ImmOpType:$imm, VGPR_32:$vsrc1Y);
+
field string Asm32 = "$vdst, $src0, $imm, $src1";
+ field string AsmVOPDX = "$vdstX, $src0X, $imm, $vsrc1X";
+ let AsmVOPDXDeferred = "$vdstX, $src0X, $immDeferred, $vsrc1X";
+ field string AsmVOPDY = "$vdstY, $src0Y, $imm, $vsrc1Y";
field bit HasExt = 0;
let IsSingle = 1;
}
@@ -537,31 +591,31 @@ def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> {
let SubtargetPredicate = isGFX11Plus in
defm V_CNDMASK_B16 : VOP2eInst <"v_cndmask_b16", VOP2e_I16_I16_I16_I1>;
-defm V_CNDMASK_B32 : VOP2eInst <"v_cndmask_b32", VOP2e_I32_I32_I32_I1>;
+defm V_CNDMASK_B32 : VOP2eInst_VOPD <"v_cndmask_b32", VOP2e_I32_I32_I32_I1, 0x9, "v_cndmask_b32">;
let SubtargetPredicate = HasMadMacF32Insts, isReMaterializable = 1 in
def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>;
let isCommutable = 1 in {
let isReMaterializable = 1 in {
-defm V_ADD_F32 : VOP2Inst <"v_add_f32", VOP_F32_F32_F32, any_fadd>;
-defm V_SUB_F32 : VOP2Inst <"v_sub_f32", VOP_F32_F32_F32, any_fsub>;
-defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub_f32">;
-defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>;
-defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, any_fmul>;
+defm V_ADD_F32 : VOP2Inst_VOPD <"v_add_f32", VOP_F32_F32_F32, 0x4, "v_add_f32", any_fadd>;
+defm V_SUB_F32 : VOP2Inst_VOPD <"v_sub_f32", VOP_F32_F32_F32, 0x5, "v_sub_f32", any_fsub>;
+defm V_SUBREV_F32 : VOP2Inst_VOPD <"v_subrev_f32", VOP_F32_F32_F32, 0x6, "v_subrev_f32", null_frag, "v_sub_f32">;
+defm V_MUL_LEGACY_F32 : VOP2Inst_VOPD <"v_mul_legacy_f32", VOP_F32_F32_F32, 0x7, "v_mul_dx9_zero_f32", AMDGPUfmul_legacy>;
+defm V_MUL_F32 : VOP2Inst_VOPD <"v_mul_f32", VOP_F32_F32_F32, 0x3, "v_mul_f32", any_fmul>;
defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32_ARITH, AMDGPUmul_i24>;
defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_I32_I32_I32, AMDGPUmulhi_i24>;
defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32_ARITH, AMDGPUmul_u24>;
defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>;
-defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum_like>;
-defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum_like>;
+defm V_MIN_F32 : VOP2Inst_VOPD <"v_min_f32", VOP_F32_F32_F32, 0xb, "v_min_f32", fminnum_like>;
+defm V_MAX_F32 : VOP2Inst_VOPD <"v_max_f32", VOP_F32_F32_F32, 0xa, "v_max_f32", fmaxnum_like>;
defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smin>;
defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smax>;
defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umin>;
defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umax>;
defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, clshr_rev_32, "v_lshr_b32">;
defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, cashr_rev_32, "v_ashr_i32">;
-defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, clshl_rev_32, "v_lshl_b32">;
-defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, and>;
+defm V_LSHLREV_B32 : VOP2Inst_VOPD <"v_lshlrev_b32", VOP_I32_I32_I32, 0x11, "v_lshlrev_b32", clshl_rev_32, "v_lshl_b32">;
+defm V_AND_B32 : VOP2Inst_VOPD <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, 0x12, "v_and_b32", and>;
defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>;
defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, xor>;
} // End isReMaterializable = 1
@@ -593,7 +647,7 @@ defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_f
let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1 in {
-defm V_ADD_U32 : VOP2Inst <"v_add_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_add_u32", 1>;
+defm V_ADD_U32 : VOP2Inst_VOPD <"v_add_u32", VOP_I32_I32_I32_ARITH, 0x10, "v_add_nc_u32", null_frag, "v_add_u32", 1>;
defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>;
defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>;
}
@@ -783,7 +837,7 @@ let Constraints = "$vdst = $src2",
DisableEncoding = "$src2",
isConvertibleToThreeAddress = 1,
isCommutable = 1 in
-defm V_FMAC_F32 : VOP2Inst <"v_fmac_f32", VOP_MAC_F32>;
+defm V_FMAC_F32 : VOP2Inst_VOPD <"v_fmac_f32", VOP_MAC_F32, 0x0, "v_fmac_f32">;
} // End SubtargetPredicate = HasDLInsts
@@ -811,7 +865,7 @@ let Constraints = "$vdst = $src2",
isCommutable = 1,
IsDOT = 1 in {
let SubtargetPredicate = HasDot5Insts in
- defm V_DOT2C_F32_F16 : VOP2Inst<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16>;
+ defm V_DOT2C_F32_F16 : VOP2Inst_VOPD<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16, 0xc, "v_dot2acc_f32_f16">;
let SubtargetPredicate = HasDot6Insts in
defm V_DOT4C_I32_I8 : VOP2Inst<"v_dot4c_i32_i8", VOP_DOT_ACC_I32_I32>;
@@ -849,10 +903,10 @@ let AddedComplexity = 30 in {
} // End AddedComplexity = 30
let SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1 in {
-def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">;
+def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">, VOPD_Component<0x2, "v_fmamk_f32">;
let isCommutable = 1 in
-def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">;
+def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">, VOPD_Component<0x1, "v_fmaak_f32">;
}
let SubtargetPredicate = isGFX10Plus in {
diff --git a/llvm/lib/Target/AMDGPU/VOPDInstructions.td b/llvm/lib/Target/AMDGPU/VOPDInstructions.td
new file mode 100644
index 0000000000000..420f184360957
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/VOPDInstructions.td
@@ -0,0 +1,159 @@
+//===-- VOPDInstructions.td - Vector Instruction Definitions --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Encodings
+//===----------------------------------------------------------------------===//
+
+class VOPDe<bits<4> opX, bits<5> opY> : Enc64 {
+ bits<9> src0X;
+ bits<8> vsrc1X;
+ bits<8> vdstX;
+ bits<9> src0Y;
+ bits<8> vsrc1Y;
+ bits<8> vdstY;
+
+ let Inst{8-0} = src0X;
+ let Inst{16-9} = vsrc1X;
+ let Inst{21-17} = opY;
+ let Inst{25-22} = opX;
+ let Inst{31-26} = 0x32; // encoding
+ let Inst{40-32} = src0Y;
+ let Inst{48-41} = vsrc1Y;
+ let Inst{55-49} = vdstY{7-1};
+ let Inst{63-56} = vdstX;
+}
+
+class VOPD_MADKe<bits<4> opX, bits<5> opY> : Enc96 {
+ bits<9> src0X;
+ bits<8> vsrc1X;
+ bits<8> vdstX;
+ bits<9> src0Y;
+ bits<8> vsrc1Y;
+ bits<8> vdstY;
+ bits<32> imm;
+
+ let Inst{8-0} = src0X;
+ let Inst{16-9} = vsrc1X;
+ let Inst{21-17} = opY;
+ let Inst{25-22} = opX;
+ let Inst{31-26} = 0x32; // encoding
+ let Inst{40-32} = src0Y;
+ let Inst{48-41} = vsrc1Y;
+ let Inst{55-49} = vdstY{7-1};
+ let Inst{63-56} = vdstX;
+ let Inst{95-64} = imm;
+}
+
+//===----------------------------------------------------------------------===//
+// VOPD classes
+//===----------------------------------------------------------------------===//
+
+class VOPD_Base<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
+ VOPD_Component XasVC, VOPD_Component YasVC>
+ : VOPAnyCommon<outs, ins, asm, []>,
+ VOP<NAME>,
+ SIMCInstr<NAME, SIEncodingFamily.GFX11> {
+ // Fields for table indexing
+ Instruction Opcode = !cast<Instruction>(NAME);
+ bits<5> OpX = XasVC.VOPDOp;
+ bits<5> OpY = YasVC.VOPDOp;
+
+ let VALU = 1;
+
+ let DecoderNamespace = "GFX11";
+ let AssemblerPredicate = isGFX11Plus;
+ let WaveSizePredicate = isWave32;
+ let isCodeGenOnly = 0;
+ let SubtargetPredicate = isGFX11Plus;
+ let AsmMatchConverter = "cvtVOPD";
+ let Size = 8;
+ let ReadsModeReg = !or(VDX.ReadsModeReg, VDY.ReadsModeReg);
+ let mayRaiseFPException = ReadsModeReg;
+
+ let Uses = RegListUnion<VDX.Uses, VDY.Uses>.ret;
+ let Defs = RegListUnion<VDX.Defs, VDY.Defs>.ret;
+ let SchedRW = !listconcat(VDX.SchedRW, VDY.SchedRW);
+}
+
+class VOPD<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
+ VOPD_Component XasVC, VOPD_Component YasVC>
+ : VOPD_Base<outs, ins, asm, VDX, VDY, XasVC, YasVC>,
+ VOPDe<XasVC.VOPDOp{3-0}, YasVC.VOPDOp> {
+ let Inst{16-9} = !if (!eq(VDX.Mnemonic, "v_mov_b32"), 0x0, vsrc1X);
+ let Inst{48-41} = !if (!eq(VDY.Mnemonic, "v_mov_b32"), 0x0, vsrc1Y);
+}
+
+class VOPD_MADK<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
+ VOPD_Component XasVC, VOPD_Component YasVC>
+ : VOPD_Base<outs, ins, asm, VDX, VDY, XasVC, YasVC>,
+ VOPD_MADKe<XasVC.VOPDOp{3-0}, YasVC.VOPDOp> {
+ let Inst{16-9} = !if (!eq(VDX.Mnemonic, "v_mov_b32"), 0x0, vsrc1X);
+ let Inst{48-41} = !if (!eq(VDY.Mnemonic, "v_mov_b32"), 0x0, vsrc1Y);
+ let Size = 12;
+}
+
+// V_DUAL_DOT2ACC_F32_BF16 is a legal instruction, but V_DOT2ACC_F32_BF16 is
+// not. Since we generate the DUAL form by converting from the normal form we
+// will never generate it.
+defvar VOPDYPseudos = [
+ "V_FMAC_F32_e32", "V_FMAAK_F32", "V_FMAMK_F32", "V_MUL_F32_e32",
+ "V_ADD_F32_e32", "V_SUB_F32_e32", "V_SUBREV_F32_e32", "V_MUL_LEGACY_F32_e32",
+ "V_MOV_B32_e32", "V_CNDMASK_B32_e32", "V_MAX_F32_e32", "V_MIN_F32_e32",
+ "V_DOT2C_F32_F16_e32", "V_ADD_U32_e32", "V_LSHLREV_B32_e32", "V_AND_B32_e32"
+];
+defvar VOPDXPseudos = VOPDYPseudos[0...VOPDX_Max_Index];
+
+def VOPDDstYOperand : RegisterOperand<VGPR_32, "printRegularOperand"> {
+ let DecoderMethod = "decodeOperandVOPDDstY";
+}
+
+foreach x = VOPDXPseudos in {
+ foreach y = VOPDYPseudos in {
+ defvar xInst = !cast<VOP_Pseudo>(x);
+ defvar yInst = !cast<VOP_Pseudo>(y);
+ defvar XasVC = !cast<VOPD_Component>(x);
+ defvar YasVC = !cast<VOPD_Component>(y);
+ defvar isMADK = !or(!eq(x, "V_FMAAK_F32"), !eq(x, "V_FMAMK_F32"),
+ !eq(y, "V_FMAAK_F32"), !eq(y, "V_FMAMK_F32"));
+ // If X or Y is MADK (have a mandatory immediate), all src operands which
+ // may contain an optional literal must use the VSrc_*_Deferred operand
+ // type. Optional literal operands in MADK VOPD components always use this
+ // operand form. If Both X and Y are MADK, the mandatory literal of X
+ // additionally must use an alternate operand format which defers to the
+ // 'real' Y literal
+ defvar isOpXMADK = !or(!eq(x, "V_FMAAK_F32"), !eq(x, "V_FMAMK_F32"));
+ defvar isOpYMADK = !or(!eq(y, "V_FMAAK_F32"), !eq(y, "V_FMAMK_F32"));
+ defvar OpName = "V_DUAL_" # !substr(x,2) # "_X_" # !substr(y,2);
+ defvar outs = (outs VGPRSrc_32:$vdstX, VOPDDstYOperand:$vdstY);
+ if !or(isOpXMADK, isOpYMADK) then {
+ if !and(isOpXMADK, isOpYMADK) then {
+ defvar X_MADK_Pfl = !cast<VOP_MADK_Base>(xInst.Pfl);
+ defvar ins = !con(xInst.Pfl.InsVOPDXDeferred, yInst.Pfl.InsVOPDY);
+ defvar asm = XasVC.VOPDName #" "# X_MADK_Pfl.AsmVOPDXDeferred #" :: "# YasVC.VOPDName #" "# yInst.Pfl.AsmVOPDY;
+ def OpName : VOPD_MADK<outs, ins, asm, xInst, yInst, XasVC, YasVC>;
+ } else {
+ defvar asm = XasVC.VOPDName #" "# xInst.Pfl.AsmVOPDX #" :: "# YasVC.VOPDName #" "# yInst.Pfl.AsmVOPDY;
+ if isOpXMADK then {
+ assert !not(isOpYMADK), "Expected only OpX as MADK";
+ defvar ins = !con(xInst.Pfl.InsVOPDX, yInst.Pfl.InsVOPDYDeferred);
+ def OpName : VOPD_MADK<outs, ins, asm, xInst, yInst, XasVC, YasVC>;
+ } else {
+ assert !not(isOpXMADK), "Expected only OpY as MADK";
+ defvar ins = !con(xInst.Pfl.InsVOPDXDeferred, yInst.Pfl.InsVOPDY);
+ def OpName : VOPD_MADK<outs, ins, asm, xInst, yInst, XasVC, YasVC>;
+ }
+ }
+ } else {
+ defvar ins = !con(xInst.Pfl.InsVOPDX, yInst.Pfl.InsVOPDY);
+ defvar asm = XasVC.VOPDName #" "# xInst.Pfl.AsmVOPDX #" :: "# YasVC.VOPDName #" "# yInst.Pfl.AsmVOPDY;
+ def OpName : VOPD<outs, ins, asm, xInst, yInst, XasVC, YasVC>;
+ }
+ }
+}
+
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index d59e165e9e9dd..87e299dd67380 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -30,6 +30,16 @@ class VOP <string opName> {
string OpName = opName;
}
+// First 13 insts from VOPDY are also VOPDX. DOT2ACC_F32_BF16 is omitted
+defvar VOPDX_Max_Index = 12;
+
+class VOPD_Component<bits<5> OpIn, string vOPDName> {
+ Instruction BaseVOP = !cast<Instruction>(NAME);
+ string VOPDName = "v_dual_" # !substr(vOPDName, 2);
+ bits<5> VOPDOp = OpIn;
+ bit CanBeVOPDX = !le(VOPDOp, VOPDX_Max_Index);
+}
+
class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> :
InstSI <outs, ins, asm, pattern> {
@@ -1417,6 +1427,7 @@ include "VOP1Instructions.td"
include "VOP2Instructions.td"
include "VOP3Instructions.td"
include "VOP3PInstructions.td"
+include "VOPDInstructions.td"
class VOPInfoTable <string Format> : GenericTable {
diff --git a/llvm/test/MC/AMDGPU/gfx11_err.s b/llvm/test/MC/AMDGPU/gfx11_err.s
index fd48b067145f2..82ebbb26f5e8b 100644
--- a/llvm/test/MC/AMDGPU/gfx11_err.s
+++ b/llvm/test/MC/AMDGPU/gfx11_err.s
@@ -57,6 +57,10 @@ v_cvt_f16_u16_e64_dpp v5, s1 dpp8:[7,6,5,4,3,2,1,0]
v_cvt_f16_u16_e64_dpp v5, s1 row_shl:1 row_mask:0xf bank_mask:0xf
// GFX11: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+; disallow space between colons
+v_dual_mul_f32 v0, v0, v2 : : v_dual_mul_f32 v1, v1, v3
+// GFX11: [[@LINE-1]]:{{[0-9]+}}: error: unknown token in expression
+
// On GFX11, v_dot8_i32_i4 is a valid SP3 alias for v_dot8_i32_iu4.
// However, we intentionally leave it unimplemented because on other
// processors v_dot8_i32_i4 denotes an instruction of a
diff erent
diff --git a/llvm/test/MC/AMDGPU/vopd.s b/llvm/test/MC/AMDGPU/vopd.s
new file mode 100644
index 0000000000000..9399d8ed4242f
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/vopd.s
@@ -0,0 +1,86 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefixes=W64-ERR --implicit-check-not=error: %s
+
+v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3
+// GFX11: encoding: [0x00,0x05,0xc6,0xc8,0x01,0x07,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
+
+v_dual_mul_f32 v0, s1, v2 :: v_dual_mul_f32 v3, s4, v5
+// GFX11: encoding: [0x01,0x04,0xc6,0xc8,0x04,0x0a,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
+
+v_dual_mul_f32 v11, v1, v2 :: v_dual_mul_f32 v10, 0x24681357, v5
+// GFX11: encoding: [0x01,0x05,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
+
+v_dual_mul_f32 v11, 0x24681357, v2 :: v_dual_mul_f32 v10, 0x24681357, v5
+// GFX11: encoding: [0xff,0x04,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
+
+v_dual_min_f32 v0, v1 , v2 :: v_dual_max_f32 v3, v4, v5
+// GFX11: encoding: [0x01,0x05,0xd4,0xca,0x04,0x0b,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
+
+v_dual_cndmask_b32 v20, v21, v22 :: v_dual_mov_b32 v41, v42
+// GFX11: encoding: [0x15,0x2d,0x50,0xca,0x2a,0x01,0x28,0x14]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
+
+v_dual_fmac_f32 v0, v1, v2 :: v_dual_fmamk_f32 v3, v6, 0x3f700000, v1
+// GFX11: encoding: [0x01,0x05,0x04,0xc8,0x06,0x03,0x02,0x00,0x00,0x00,0x70,0x3f]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
+
+v_dual_fmamk_f32 v122, v74, 0xa0172923, v161 :: v_dual_lshlrev_b32 v247, v160, v99
+// GFX11: encoding: [0x4a,0x43,0xa3,0xc8,0xa0,0xc7,0xf6,0x7a,0x23,0x29,0x17,0xa0]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
+
+v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_and_b32 v247, v160, v98
+// GFX11: encoding: [0x4a,0x42,0x65,0xc8,0xa0,0xc5,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
+
+v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_fmamk_f32 v3, v6, 2.741, v1
+// GFX11: encoding: [0x4a,0x42,0x45,0xc8,0x06,0x03,0x02,0x7a,0x8b,0x6c,0x2f,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
+
+v_dual_mov_b32 v247, v160 :: v_dual_fmaak_f32 v122, s74, v161, 2.741
+// GFX11: encoding: [0xa0,0x01,0x02,0xca,0x4a,0x42,0x7b,0xf7,0x8b,0x6c,0x2f,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
+
+v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_mov_b32 v247, v160
+// GFX11: encoding: [0x4a,0x42,0x51,0xc8,0xa0,0x01,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
+
+v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_mov_b32 v247, 2.741
+// GFX11: encoding: [0x4a,0x42,0x51,0xc8,0xff,0x00,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
+
+v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_mov_b32 v247, 2
+// GFX11: encoding: [0x4a,0x42,0x51,0xc8,0x82,0x00,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
+
+v_dual_subrev_f32 v0, v1 , v2 :: v_dual_add_nc_u32 v3, v4, v5
+// GFX11: encoding: [0x01,0x05,0xa0,0xc9,0x04,0x0b,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
+
+v_dual_mul_dx9_zero_f32 v11, 0x24681357, v2 :: v_dual_dot2acc_f32_f16 v10, 0x24681357, v5
+// GFX11: encoding: [0xff,0x04,0xd8,0xc9,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
+
+v_dual_fmamk_f32 v122, 0xdeadbeef, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, 0xdeadbeef, 0xdeadbeef, v162
+// GFX11: encoding: [0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xef,0xbe,0xad,0xde]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
+
+v_dual_fmamk_f32 v122, 255, 255, v161 :: v_dual_fmamk_f32 v123, 255, 255, v162
+// GFX11: encoding: [0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
+
+v_dual_mov_b32 v255, v1 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v3
+// GFX11: encoding: [0x01,0x01,0x04,0xca,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
+
+v_dual_add_f32 v5, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, v3, v1, 0xaf123456 ;
+// GFX11: encoding: [0xff,0x04,0x02,0xc9,0x03,0x03,0x06,0x05,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
+
+;Illegal, but assembler does not check register or literal constraints for VOPD
+;v_dual_fmamk_f32 v122, v74, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v122, v74, 0xa0172923, v161
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt
index 56b373aac41df..5ae54762dbe4a 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt
@@ -14571,6 +14571,66 @@
# GFX11: v_dot8_u32_u4 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x19,0xcc,0x01,0x05,0x0e,0x1c]
0x00,0x40,0x19,0xcc,0x01,0x05,0x0e,0x1c
+# W32: v_dual_add_f32 v5, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, v3, v1, 0xaf123456 ; encoding: [0xff,0x04,0x02,0xc9,0x03,0x03,0x06,0x05,0x56,0x34,0x12,0xaf]
+0xff,0x04,0x02,0xc9,0x03,0x03,0x06,0x05,0x56,0x34,0x12,0xaf
+
+# W32: v_dual_cndmask_b32 v20, v21, v22 :: v_dual_mov_b32 v41, v42 ; encoding: [0x15,0x2d,0x50,0xca,0x2a,0x01,0x28,0x14]
+0x15,0x2d,0x50,0xca,0x2a,0x01,0x28,0x14
+
+# W32: v_dual_fmaak_f32 v122, s74, v161, 0x402f6c8b :: v_dual_and_b32 v247, v160, v98 ; encoding: [0x4a,0x42,0x65,0xc8,0xa0,0xc5,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
+0x4a,0x42,0x65,0xc8,0xa0,0xc5,0xf6,0x7a,0x8b,0x6c,0x2f,0x40
+
+# W32: v_dual_fmaak_f32 v122, s74, v161, 0x402f6c8b :: v_dual_fmamk_f32 v3, v6, 0x402f6c8b, v1 ; encoding: [0x4a,0x42,0x45,0xc8,0x06,0x03,0x02,0x7a,0x8b,0x6c,0x2f,0x40]
+0x4a,0x42,0x45,0xc8,0x06,0x03,0x02,0x7a,0x8b,0x6c,0x2f,0x40
+
+# W32: v_dual_fmaak_f32 v6, v3, v1, 0xaf123456 :: v_dual_add_f32 v5, 0xaf123456, v2 ; encoding: [0x03,0x03,0x48,0xc8,0xff,0x04,0x04,0x06,0x56,0x34,0x12,0xaf]
+0x03,0x03,0x48,0xc8,0xff,0x04,0x04,0x06,0x56,0x34,0x12,0xaf
+
+# W32: v_dual_fmac_f32 v0, v1, v2 :: v_dual_fmamk_f32 v3, v6, 0x3f700000, v1 ; encoding: [0x01,0x05,0x04,0xc8,0x06,0x03,0x02,0x00,0x00,0x00,0x70,0x3f]
+0x01,0x05,0x04,0xc8,0x06,0x03,0x02,0x00,0x00,0x00,0x70,0x3f
+
+# W32: v_dual_fmamk_f32 v122, 0xdeadbeef, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, 0xdeadbeef, 0xdeadbeef, v162 ; encoding: [0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xef,0xbe,0xad,0xde]
+0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xef,0xbe,0xad,0xde
+
+# W32: v_dual_fmamk_f32 v122, 0xff, 0xff, v161 :: v_dual_fmamk_f32 v123, 0xff, 0xff, v162 ; encoding: [0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xff,0x00,0x00,0x00]
+0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xff,0x00,0x00,0x00
+
+# W32: v_dual_fmamk_f32 v122, v74, 0xa0172923, v161 :: v_dual_lshlrev_b32 v247, v160, v99 ; encoding: [0x4a,0x43,0xa3,0xc8,0xa0,0xc7,0xf6,0x7a,0x23,0x29,0x17,0xa0]
+0x4a,0x43,0xa3,0xc8,0xa0,0xc7,0xf6,0x7a,0x23,0x29,0x17,0xa0
+
+# W32: v_dual_fmaak_f32 v122, s74, v161, 0x402f6c8b :: v_dual_mov_b32 v247, 0x402f6c8b ; encoding: [0x4a,0x42,0x51,0xc8,0xff,0x00,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
+0x4a,0x42,0x51,0xc8,0xff,0x00,0xf6,0x7a,0x8b,0x6c,0x2f,0x40
+
+# W32: v_dual_fmaak_f32 v122, s74, v161, 0x402f6c8b :: v_dual_mov_b32 v247, 2 ; encoding: [0x4a,0x42,0x51,0xc8,0x82,0x00,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
+0x4a,0x42,0x51,0xc8,0x82,0x00,0xf6,0x7a,0x8b,0x6c,0x2f,0x40
+
+# W32: v_dual_min_f32 v0, v1, v2 :: v_dual_max_f32 v3, v4, v5 ; encoding: [0x01,0x05,0xd4,0xca,0x04,0x0b,0x02,0x00]
+0x01,0x05,0xd4,0xca,0x04,0x0b,0x02,0x00
+
+# W32: v_dual_mov_b32 v247, v160 :: v_dual_fmaak_f32 v122, s74, v161, 0x402f6c8b ; encoding: [0xa0,0x01,0x02,0xca,0x4a,0x42,0x7b,0xf7,0x8b,0x6c,0x2f,0x40]
+0xa0,0x01,0x02,0xca,0x4a,0x42,0x7b,0xf7,0x8b,0x6c,0x2f,0x40
+
+# W32: v_dual_mov_b32 v255, v1 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v3 ; encoding: [0x01,0x01,0x04,0xca,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf]
+0x01,0x01,0x04,0xca,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf
+
+# W32: v_dual_mul_dx9_zero_f32 v11, 0x24681357, v2 :: v_dual_dot2acc_f32_f16 v10, 0x24681357, v5 ; encoding: [0xff,0x04,0xd8,0xc9,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24]
+0xff,0x04,0xd8,0xc9,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24
+
+# W32: v_dual_mul_f32 v0, s1, v2 :: v_dual_mul_f32 v3, s4, v5 ; encoding: [0x01,0x04,0xc6,0xc8,0x04,0x0a,0x02,0x00]
+0x01,0x04,0xc6,0xc8,0x04,0x0a,0x02,0x00
+
+# W32: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3 ; encoding: [0x00,0x05,0xc6,0xc8,0x01,0x07,0x00,0x00]
+0x00,0x05,0xc6,0xc8,0x01,0x07,0x00,0x00
+
+# W32: v_dual_mul_f32 v11, 0x24681357, v2 :: v_dual_mul_f32 v10, 0x24681357, v5 ; encoding: [0xff,0x04,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24]
+0xff,0x04,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24
+
+# W32: v_dual_mul_f32 v11, v1, v2 :: v_dual_mul_f32 v10, 0x24681357, v5 ; encoding: [0x01,0x05,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24]
+0x01,0x05,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24
+
+# W32: v_dual_subrev_f32 v0, v1, v2 :: v_dual_add_nc_u32 v3, v4, v5 ; encoding: [0x01,0x05,0xa0,0xc9,0x04,0x0b,0x02,0x00]
+0x01,0x05,0xa0,0xc9,0x04,0x0b,0x02,0x00
+
# GFX11: v_exp_f32_e32 v255, v1 ; encoding: [0x01,0x4b,0xfe,0x7f]
0x01,0x4b,0xfe,0x7f
More information about the llvm-commits
mailing list