[llvm] [AMDGPU] MC support for v_fmaak_f64/v_fmamk_f64 gfx1250 intructions (PR #148282)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 11 12:52:56 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Stanislav Mekhanoshin (rampitec)
<details>
<summary>Changes</summary>
---
Patch is 42.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/148282.diff
17 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (+4)
- (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+26-4)
- (modified) llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (+27)
- (modified) llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h (+1)
- (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+2)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp (+12)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h (+2)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp (+17-7)
- (modified) llvm/lib/Target/AMDGPU/SIDefines.h (+2-1)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+2)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+6)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+1)
- (modified) llvm/lib/Target/AMDGPU/VOP2Instructions.td (+47-4)
- (added) llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s (+163)
- (added) llvm/test/MC/AMDGPU/gfx1250_asm_vop2_err.s (+21)
- (modified) llvm/test/MC/AMDGPU/gfx1250_err.s (+40)
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop2.txt (+110)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 91ace4d2b7f16..31420caca0899 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -2488,6 +2488,10 @@ def HasFmaakFmamkF32Insts :
Predicate<"Subtarget->hasFmaakFmamkF32Insts()">,
AssemblerPredicate<(any_of FeatureGFX10Insts, FeatureGFX940Insts)>;
+def HasFmaakFmamkF64Insts :
+ Predicate<"Subtarget->hasFmaakFmamkF64Insts()">,
+ AssemblerPredicate<(any_of FeatureGFX1250Insts)>;
+
def HasImageInsts : Predicate<"Subtarget->hasImageInsts()">,
AssemblerPredicate<(all_of FeatureImageInsts)>;
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 35de49c27b32a..886de501dadac 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -951,6 +951,10 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isLiteralImm(MVT::f16);
}
+ bool isKImmFP64() const {
+ return isLiteralImm(MVT::f64);
+ }
+
bool isMem() const override {
return false;
}
@@ -2003,6 +2007,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
case AMDGPU::OPERAND_REG_INLINE_C_INT64:
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
+ case AMDGPU::OPERAND_KIMM64:
return &APFloat::IEEEdouble();
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
@@ -2343,6 +2348,11 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
// in predicate methods (isLiteralImm())
llvm_unreachable("fp literal in 64-bit integer instruction.");
+ case AMDGPU::OPERAND_KIMM64:
+ Inst.addOperand(MCOperand::createImm(Val));
+ setImmKindMandatoryLiteral();
+ return;
+
case AMDGPU::OPERAND_REG_IMM_BF16:
case AMDGPU::OPERAND_REG_INLINE_C_BF16:
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
@@ -2548,6 +2558,13 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
setImmKindMandatoryLiteral();
return;
+ case AMDGPU::OPERAND_KIMM64:
+ if ((isInt<32>(Val) || isUInt<32>(Val)) && !getModifiers().Lit64)
+ Val <<= 32;
+
+ Inst.addOperand(MCOperand::createImm(Val));
+ setImmKindMandatoryLiteral();
+ return;
default:
llvm_unreachable("invalid operand size");
}
@@ -4992,7 +5009,7 @@ bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
unsigned NumExprs = 0;
unsigned NumLiterals = 0;
- uint32_t LiteralValue;
+ uint64_t LiteralValue;
for (int OpIdx : OpIndices) {
if (OpIdx == -1)
@@ -5006,16 +5023,21 @@ bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
uint64_t Value = static_cast<uint64_t>(MO.getImm());
- bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
+ bool IsForcedFP64 =
+ Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_KIMM64 ||
+ (Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_REG_IMM_FP64 &&
+ HasMandatoryLiteral);
+ bool IsFP64 = (IsForcedFP64 || AMDGPU::isSISrcFPOperand(Desc, OpIdx)) &&
AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
- if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) {
+ if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value) &&
+ !IsForcedFP64 && (!has64BitLiterals() || Desc.getSize() != 4)) {
Error(getLitLoc(Operands), "invalid operand for instruction");
return false;
}
- if (IsFP64 && IsValid32Op)
+ if (IsFP64 && IsValid32Op && !IsForcedFP64)
Value = Hi_32(Value);
if (NumLiterals == 0 || LiteralValue != Value) {
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 7b1ea11d58168..a19927b2c2f0c 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -353,6 +353,13 @@ static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
}
+static DecodeStatus decodeOperand_KImmFP64(MCInst &Inst, uint64_t Imm,
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
+ const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+ return addOperand(Inst, DAsm->decodeMandatoryLiteral64Constant(Imm));
+}
+
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
uint64_t Addr, const void *Decoder) {
const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
@@ -613,6 +620,15 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
break;
+ if (STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
+ // Return 8 bytes for a potential literal.
+ Bytes = Bytes_.slice(4, MaxInstBytesNum - 4);
+
+ if (isGFX1250() &&
+ tryDecodeInst(DecoderTableGFX125096, MI, DecW, Address, CS))
+ break;
+ }
+
// Reinitialize Bytes
Bytes = Bytes_.slice(0, MaxInstBytesNum);
@@ -1467,6 +1483,17 @@ AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
return MCOperand::createImm(Literal);
}
+MCOperand
+AMDGPUDisassembler::decodeMandatoryLiteral64Constant(uint64_t Val) const {
+ if (HasLiteral) {
+ if (Literal64 != Val)
+ return errOperand(Val, "More than one unique literal is illegal");
+ }
+ HasLiteral = true;
+ Literal = Literal64 = Val;
+ return MCOperand::createImm(Literal64);
+}
+
MCOperand AMDGPUDisassembler::decodeLiteralConstant(bool ExtendFP64) const {
// For now all literal constants are supposed to be unsigned integer
// ToDo: deal with signed/unsigned 64-bit integer constants
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 8927f208fd2af..84041001b6ba7 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -178,6 +178,7 @@ class AMDGPUDisassembler : public MCDisassembler {
static MCOperand decodeIntImmed(unsigned Imm);
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const;
+ MCOperand decodeMandatoryLiteral64Constant(uint64_t Imm) const;
MCOperand decodeLiteralConstant(bool ExtendFP64) const;
MCOperand decodeLiteral64Constant() const;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 8ea60871b6613..e6dd98a104209 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1100,6 +1100,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return getGeneration() >= GFX10 || hasGFX940Insts();
}
+ bool hasFmaakFmamkF64Insts() const { return hasGFX1250Insts(); }
+
bool hasImageInsts() const {
return HasImageInsts;
}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index 8ce12dfeda779..cb6319ed627ca 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -76,6 +76,18 @@ void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo,
O << formatHex(MI->getOperand(OpNo).getImm() & 0xffffffff);
}
+void AMDGPUInstPrinter::printFP64ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ // KIMM64
+ // This part needs to align with AMDGPUInstPrinter::printImmediate64.
+ uint64_t Imm = MI->getOperand(OpNo).getImm();
+ if (STI.hasFeature(AMDGPU::Feature64BitLiterals) && Lo_32(Imm))
+ O << "lit64(" << formatHex(static_cast<uint64_t>(Imm)) << ')';
+ else
+ O << formatHex(static_cast<uint64_t>(Hi_32(Imm)));
+}
+
void AMDGPUInstPrinter::printNamedBit(const MCInst *MI, unsigned OpNo,
raw_ostream &O, StringRef BitName) {
if (MI->getOperand(OpNo).getImm()) {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index 071e0a9d0fee6..fb803b1f81342 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -42,6 +42,8 @@ class AMDGPUInstPrinter : public MCInstPrinter {
void printU16ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU32ImmOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printFP64ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printNamedBit(const MCInst *MI, unsigned OpNo, raw_ostream &O,
StringRef BitName);
void printOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
index f0f655e93f4cc..2cd8af918f324 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
@@ -87,9 +87,10 @@ class AMDGPUMCCodeEmitter : public MCCodeEmitter {
const MCSubtargetInfo &STI) const;
/// Encode an fp or int literal.
- std::optional<uint32_t> getLitEncoding(const MCOperand &MO,
- const MCOperandInfo &OpInfo,
- const MCSubtargetInfo &STI) const;
+ std::optional<uint64_t>
+ getLitEncoding(const MCOperand &MO, const MCOperandInfo &OpInfo,
+ const MCSubtargetInfo &STI,
+ bool HasMandatoryLiteral = false) const;
void getBinaryCodeForInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
APInt &Inst, APInt &Scratch,
@@ -265,10 +266,11 @@ static uint32_t getLit64Encoding(uint64_t Val, const MCSubtargetInfo &STI,
: 255;
}
-std::optional<uint32_t>
+std::optional<uint64_t>
AMDGPUMCCodeEmitter::getLitEncoding(const MCOperand &MO,
const MCOperandInfo &OpInfo,
- const MCSubtargetInfo &STI) const {
+ const MCSubtargetInfo &STI,
+ bool HasMandatoryLiteral) const {
int64_t Imm;
if (MO.isExpr()) {
if (!MO.getExpr()->evaluateAsAbsolute(Imm))
@@ -303,9 +305,13 @@ AMDGPUMCCodeEmitter::getLitEncoding(const MCOperand &MO,
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
- case AMDGPU::OPERAND_REG_IMM_FP64:
return getLit64Encoding(static_cast<uint64_t>(Imm), STI, true);
+ case AMDGPU::OPERAND_REG_IMM_FP64: {
+ auto Enc = getLit64Encoding(static_cast<uint64_t>(Imm), STI, true);
+ return (HasMandatoryLiteral && Enc == 255) ? 254 : Enc;
+ }
+
case AMDGPU::OPERAND_REG_IMM_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
return getLit16IntEncoding(static_cast<uint32_t>(Imm), STI);
@@ -339,6 +345,7 @@ AMDGPUMCCodeEmitter::getLitEncoding(const MCOperand &MO,
case AMDGPU::OPERAND_KIMM32:
case AMDGPU::OPERAND_KIMM16:
+ case AMDGPU::OPERAND_KIMM64:
return MO.getImm();
default:
llvm_unreachable("invalid operand size");
@@ -685,7 +692,10 @@ void AMDGPUMCCodeEmitter::getMachineOpValueCommon(
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
if (AMDGPU::isSISrcOperand(Desc, OpNo)) {
- if (auto Enc = getLitEncoding(MO, Desc.operands()[OpNo], STI)) {
+ bool HasMandatoryLiteral =
+ AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::imm);
+ if (auto Enc = getLitEncoding(MO, Desc.operands()[OpNo], STI,
+ HasMandatoryLiteral)) {
Op = *Enc;
return;
}
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 7875b2812fe63..a8649970aa825 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -229,6 +229,7 @@ enum OperandType : unsigned {
/// Operand with 32-bit immediate that uses the constant bus.
OPERAND_KIMM32,
OPERAND_KIMM16,
+ OPERAND_KIMM64,
/// Operands with an AccVGPR register or inline constant
OPERAND_REG_INLINE_AC_INT32,
@@ -254,7 +255,7 @@ enum OperandType : unsigned {
OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST,
OPERAND_KIMM_FIRST = OPERAND_KIMM32,
- OPERAND_KIMM_LAST = OPERAND_KIMM16
+ OPERAND_KIMM_LAST = OPERAND_KIMM64
};
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index ec7ef66f2c1aa..ca3af3b48a600 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4442,6 +4442,7 @@ bool SIInstrInfo::isInlineConstant(int64_t Imm, uint8_t OperandType) const {
}
case AMDGPU::OPERAND_KIMM32:
case AMDGPU::OPERAND_KIMM16:
+ case AMDGPU::OPERAND_KIMM64:
return false;
case AMDGPU::OPERAND_INPUT_MODS:
case MCOI::OPERAND_IMMEDIATE:
@@ -4867,6 +4868,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
break;
case MCOI::OPERAND_IMMEDIATE:
case AMDGPU::OPERAND_KIMM32:
+ case AMDGPU::OPERAND_KIMM64:
// Check if this operand is an immediate.
// FrameIndex operands will be replaced by immediates, so they are
// allowed.
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index aa0e1fe529d70..5e41f875d980a 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1316,6 +1316,12 @@ def KImmFP32 : KImmFPOperand<i32>;
// constant bus.
def KImmFP16 : KImmFPOperand<i16>;
+// 64-bit VALU immediate operand that uses the constant bus.
+def KImmFP64 : KImmFPOperand<i64> {
+ let DecoderMethod = "decodeOperand_KImmFP64";
+ let PrintMethod = "printFP64ImmOperand";
+}
+
class FPInputModsMatchClass <int opSize> : AsmOperandClass {
let Name = "RegOrImmWithFP"#opSize#"InputMods";
let ParserMethod = "parseRegOrImmWithFPInputMods";
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index e6840d97e3f3d..6708e0a3f4549 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1603,6 +1603,7 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
case AMDGPU::OPERAND_REG_INLINE_C_INT64:
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
+ case AMDGPU::OPERAND_KIMM64:
return 8;
case AMDGPU::OPERAND_REG_IMM_INT16:
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 834dc945e64f6..25c6cbc3e1ab5 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -36,6 +36,20 @@ class VOP2_MADKe <bits<6> op, VOPProfile P> : Enc64 {
let Inst{63-32} = imm;
}
+class VOP2_MADK64e <bits<6> op, VOPProfile P> : Enc96 {
+ bits<8> vdst;
+ bits<9> src0;
+ bits<8> src1;
+ bits<64> imm;
+
+ let Inst{8-0} = !if(P.HasSrc0, src0, 0);
+ let Inst{16-9} = !if(P.HasSrc1, src1, 0);
+ let Inst{24-17} = !if(P.EmitDst, vdst, 0);
+ let Inst{30-25} = op;
+ let Inst{31} = 0x0; // encoding
+ let Inst{95-32} = imm;
+}
+
class VOP2_SDWAe <bits<6> op, VOPProfile P> : VOP_SDWAe <P> {
bits<8> vdst;
bits<8> src1;
@@ -375,10 +389,14 @@ class VOP_MADK_Base<ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
}
class VOP_MADAK <ValueType vt> : VOP_MADK_Base<vt> {
- field Operand ImmOpType = !if(!eq(vt.Size, 32), KImmFP32, KImmFP16);
+ field Operand ImmOpType = !if(!eq(vt.Size, 32), KImmFP32,
+ !if(!eq(vt.Size, 64), KImmFP64,
+ KImmFP16));
field dag Ins32 = !if(!eq(vt.Size, 32),
(ins VSrc_f32:$src0, VGPR_32:$src1, ImmOpType:$imm),
- (ins VSrc_f16:$src0, VGPR_32:$src1, ImmOpType:$imm));
+ !if(!eq(vt.Size, 64),
+ (ins VSrc_f64:$src0, VReg_64:$src1, ImmOpType:$imm),
+ (ins VSrc_f16:$src0, VGPR_32:$src1, ImmOpType:$imm)));
field dag InsVOPDX = (ins VSrc_f32:$src0X, VGPR_32:$vsrc1X, ImmOpType:$imm);
let InsVOPDX_immX = (ins VSrc_f32:$src0X, VGPR_32:$vsrc1X, ImmOpType:$immX);
field dag InsVOPDY = (ins VSrc_f32:$src0Y, VGPR_32:$vsrc1Y, ImmOpType:$imm);
@@ -404,12 +422,17 @@ def VOP_MADAK_F16_fake16 : VOP_MADAK <f16> {
let Ins32 = (ins VSrcFake16_f16_Lo128:$src0, VGPRSrc_32_Lo128:$src1, ImmOpType:$imm);
}
def VOP_MADAK_F32 : VOP_MADAK <f32>;
+def VOP_MADAK_F64 : VOP_MADAK <f64>;
class VOP_MADMK <ValueType vt> : VOP_MADK_Base<vt> {
- field Operand ImmOpType = !if(!eq(vt.Size, 32), KImmFP32, KImmFP16);
+ field Operand ImmOpType = !if(!eq(vt.Size, 32), KImmFP32,
+ !if(!eq(vt.Size, 64), KImmFP64,
+ KImmFP16));
field dag Ins32 = !if(!eq(vt.Size, 32),
(ins VSrc_f32:$src0, ImmOpType:$imm, VGPR_32:$src1),
- (ins VSrc_f16:$src0, ImmOpType:$imm, VGPR_32:$src1));
+ !if(!eq(vt.Size, 64),
+ (ins VSrc_f64:$src0, ImmOpType:$imm, VReg_64:$src1),
+ (ins VSrc_f16:$src0, ImmOpType:$imm, VGPR_32:$src1)));
field dag InsVOPDX = (ins VSrc_f32:$src0X, ImmOpType:$imm, VGPR_32:$vsrc1X);
let InsVOPDX_immX = (ins VSrc_f32:$src0X, ImmOpType:$immX, VGPR_32:$vsrc1X);
field dag InsVOPDY = (ins VSrc_f32:$src0Y, ImmOpType:$imm, VGPR_32:$vsrc1Y);
@@ -435,6 +458,7 @@ def VOP_MADMK_F16_fake16 : VOP_MADMK <f16> {
let Ins32 = (ins VSrcFake16_f16_Lo128:$src0, ImmOpType:$imm, VGPRSrc_32_Lo128:$src1);
}
def VOP_MADMK_F32 : VOP_MADMK <f32>;
+def VOP_MADMK_F64 : VOP_MADMK <f64>;
// Returns the vreg register class to use for sources of VOP3 instructions for the
// given VT.
@@ -1296,6 +1320,14 @@ let isCommutable = 1 in
def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">, VOPD_Component<0x1, "v_fmaak_f32">;
} // End SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1, CanBeVOPD3X = 0, FixedSize = 1
+let SubtargetPredicate = HasFmaakFmamkF64Insts, isReMaterializable = 1,
+ FixedSize = 1, Size = 12, SchedRW = [Write64Bit] in {
+def V_FMAMK_F64 : VOP2_Pseudo<"v_fmamk_f64", VOP_MADMK_F64, [], "">;
+
+let isCommutable = 1 in
+def V_FMAAK_F64 : VOP2_Pseudo<"v_fmaak_f64", VOP_MADAK_F64, [], "">;
+} // End SubtargetPredicate = HasFmaakFmamkF64Insts, isReMaterializable = 1, FixedSize = 1, Size = 12, SchedRW = [Write64Bit]
+
let SubtargetPredicate = HasPkFmacF16Inst in {
defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>;
} // End SubtargetPredicate = HasPkFmacF16Inst
@@ -1518,6 +1550,14 @@ multiclass VOP2Only_Real_MADK<GFXGen Gen, bits<6> op> {
VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
}
+multiclass VOP2Only_Real_MADK64<GFXGen Gen, bits<6> op> {
+ def Gen.Suffix :
+ VOP2_Real_Gen<!cast<VOP2_Pseudo>(NAME), Gen>,
+ VOP2_MADK64e<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl> {
+ let DecoderNamespace = Gen.DecoderNamespace;
+ }
+}
+
multiclass VOP2Only_Real_MADK_with_name<GFXGen Gen, bits<6> op, string asmName,
string opName = NAME> {
def Gen.Suffix :
@@ -1792,6 +1832,9 @@ let SubtargetPredicate = isGFX12Plus in {
V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx12, "v_subrev_co_ci_u32">;
} // End SubtargetPredicate = isGFX12Plus
+defm V_FMAMK_F64 : VOP2Only_Real_MADK64<GFX1250Gen, 0x23>;
+defm V_FMAAK_F64 : VOP2Only_Real_MADK64<GFX1250Gen, 0x24>;
+...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/148282
More information about the llvm-commits
mailing list