[llvm] [AMDGPU][GFX12] VOP encoding and codegen - add support for v_cvt fp8/… (PR #78414)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 17 01:14:03 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mc
@llvm/pr-subscribers-backend-amdgpu
Author: Mariusz Sikora (mariusz-sikora-at-amd)
<details>
<summary>Changes</summary>
…bf8 instructions
Add VOP1, VOP1_DPP8, VOP1_DPP16, VOP3, VOP3_DPP8, VOP3_DPP16
instructions that were supported on GFX940 (MI300):
- V_CVT_F32_FP8
- V_CVT_F32_BF8
- V_CVT_PK_F32_FP8
- V_CVT_PK_F32_BF8
- V_CVT_PK_FP8_F32
- V_CVT_PK_BF8_F32
- V_CVT_SR_FP8_F32
- V_CVT_SR_BF8_F32
---
Patch is 106.83 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/78414.diff
29 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (+3-1)
- (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+66-2)
- (modified) llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (+49-1)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp (+3-1)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+11)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+3)
- (modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (+93-5)
- (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+49-4)
- (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll (+105)
- (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir (+197)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll (+309-61)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop1.s (+45)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16.s (+12)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8.s (+12)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3.s (+36)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s (+108)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s (+48)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s (+138)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s (+12)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s (+12)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1.txt (+36)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp16.txt (+12)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp8.txt (+12)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt (+36)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt (+108)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt (+48)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt (+36)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt (+12)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt (+12)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index c1c863d885c3a9e..852a99786efffd6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1495,6 +1495,7 @@ def FeatureISAVersion12 : FeatureSet<
FeatureFlatAtomicFaddF32Inst,
FeatureImageInsts,
FeatureExtendedImageInsts,
+ FeatureFP8Insts,
FeaturePackedTID,
FeatureVcmpxPermlaneHazard,
FeatureSALUFloatInsts,
@@ -1502,7 +1503,8 @@ def FeatureISAVersion12 : FeatureSet<
FeatureHasRestrictedSOffset,
FeatureVGPRSingleUseHintInsts,
FeatureMADIntraFwdBug,
- FeatureScalarDwordx3Loads]>;
+ FeatureScalarDwordx3Loads,
+ FeatureDPPSrc1SGPR]>;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index ba79affe683d6f7..b2b81446016ec71 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -3500,6 +3500,9 @@ bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
return !isInlineConstant(Inst, OpIdx);
} else if (MO.isReg()) {
auto Reg = MO.getReg();
+ if (!Reg) {
+ return false;
+ }
const MCRegisterInfo *TRI = getContext().getRegisterInfo();
auto PReg = mc2PseudoReg(Reg);
return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
@@ -8273,6 +8276,16 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
}
+ if (isVOP1Cvt_F32_Fp8_Bf8_e64(Opc) &&
+ Opc != AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 &&
+ Opc != AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I++]);
+ Op.addRegOrImmWithFPInputModsOperands(Inst, 1); // src0
+ // Add dummy src1
+ Inst.addOperand(MCOperand::createImm(0));
+ Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(0, getSTI())));
+ }
+
for (unsigned E = Operands.size(); I != E; ++I) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
@@ -8321,12 +8334,20 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
- Opc == AMDGPU::V_CVT_SR_FP8_F32_vi) {
+ Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
+ Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_gfx12 ||
+ Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_gfx12) {
Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
Inst.addOperand(Inst.getOperand(0));
}
- if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in)) {
+ // Adding vdst_in operand is already covered for these DPP instructions in
+ // cvtVOP3DPP.
+ if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
+ !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 ||
+ Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 ||
+ Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 ||
+ Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12)) {
assert(!IsPacked);
Inst.addOperand(Inst.getOperand(0));
}
@@ -8765,6 +8786,11 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
int Src2ModIdx =
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
+ int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
+ bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 ||
+ Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12 ||
+ Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 ||
+ Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12;
bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
@@ -8788,6 +8814,20 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
}
}
+ if (VdstInIdx != -1) {
+ int NumOperands = Inst.getNumOperands();
+ if (VdstInIdx == NumOperands)
+ Inst.addOperand(Inst.getOperand(0));
+ }
+
+ if (IsVOP3CvtSrDpp) {
+ int NumOperands = Inst.getNumOperands();
+ if (Src2ModIdx == NumOperands) {
+ Inst.addOperand(MCOperand::createImm(0));
+ Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(0, getSTI())));
+ }
+ }
+
auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
MCOI::TIED_TO);
if (TiedTo != -1) {
@@ -8801,6 +8841,13 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
Fi = Op.getImm();
} else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
+ if (isVOP1Cvt_F32_Fp8_Bf8_e64(Inst.getOpcode()) &&
+ Inst.getOpcode() != AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 &&
+ Inst.getOpcode() != AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12) {
+ // Add dummy src1
+ Inst.addOperand(MCOperand::createImm(0));
+ Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(0, getSTI())));
+ }
} else if (Op.isReg()) {
Op.addRegOperands(Inst, 1);
} else if (Op.isImm() &&
@@ -8847,6 +8894,7 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool I
OptionalImmIndexMap OptionalIdx;
unsigned I = 1;
+ const unsigned Opc = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
@@ -8874,6 +8922,14 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool I
Op.addImmOperands(Inst, 1);
} else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
Op.addRegWithFPInputModsOperands(Inst, 2);
+ if (Opc == AMDGPU::V_CVT_F32_BF8_dpp_gfx12 ||
+ Opc == AMDGPU::V_CVT_F32_FP8_dpp_gfx12 ||
+ Opc == AMDGPU::V_CVT_F32_BF8_dpp8_gfx12 ||
+ Opc == AMDGPU::V_CVT_F32_FP8_dpp8_gfx12) {
+ // Add dummy src1
+ Inst.addOperand(MCOperand::createImm(0));
+ Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(0, getSTI())));
+ }
} else if (Op.isDppFI()) {
Fi = Op.getImm();
} else if (Op.isReg()) {
@@ -8884,6 +8940,14 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool I
} else {
if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
Op.addRegWithFPInputModsOperands(Inst, 2);
+ if (Opc == AMDGPU::V_CVT_F32_BF8_dpp_gfx12 ||
+ Opc == AMDGPU::V_CVT_F32_FP8_dpp_gfx12 ||
+ Opc == AMDGPU::V_CVT_F32_BF8_dpp8_gfx12 ||
+ Opc == AMDGPU::V_CVT_F32_FP8_dpp8_gfx12) {
+ // Add dummy src1
+ Inst.addOperand(MCOperand::createImm(0));
+ Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(0, getSTI())));
+ }
} else if (Op.isReg()) {
Op.addRegOperands(Inst, 1);
} else if (Op.isDPPCtrl()) {
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 9dff3f6c2efd025..75d0511b567bbef 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -522,6 +522,15 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
convertVOPCDPPInst(MI); // Special VOP3 case
} else {
assert(MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3);
+
+ if (AMDGPU::isVOP1Cvt_F32_Fp8_Bf8_e64(MI.getOpcode())) {
+ // Add omod and clamp modifiers.
+ insertNamedMCOperand(MI, MCOperand::createImm(0),
+ AMDGPU::OpName::omod);
+ insertNamedMCOperand(MI, MCOperand::createImm(0),
+ AMDGPU::OpName::clamp);
+ }
+
convertVOP3DPPInst(MI); // Regular VOP3 case
}
};
@@ -691,8 +700,15 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Res = tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
Address, CS);
- if (Res)
+ if (Res) {
+ if (AMDGPU::isVOP1Cvt_F32_Fp8_Bf8_e64(MI.getOpcode())) {
+ // Add omod and clamp modifiers.
+ insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
+ insertNamedMCOperand(MI, MCOperand::createImm(0),
+ AMDGPU::OpName::clamp);
+ }
break;
+ }
Res = tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
Address, CS);
@@ -708,6 +724,13 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
AMDGPU::OpName::src2_modifiers);
}
+ if (Res && (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
+ MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp)) {
+ // Insert dummy unused src2_modifiers.
+ insertNamedMCOperand(MI, MCOperand::createImm(0),
+ AMDGPU::OpName::src2_modifiers);
+ }
+
if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
!AMDGPU::hasGDS(STI)) {
insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
@@ -938,6 +961,13 @@ void AMDGPUDisassembler::convertMacDPPInst(MCInst &MI) const {
// first add optional MI operands to check FI
DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
unsigned Opc = MI.getOpcode();
+
+ if (AMDGPU::isVOP1Cvt_F32_Fp8_Bf8_e64(Opc)) {
+ // Add omod and clamp modifiers.
+ insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
+ insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
+ }
+
if (MCII->get(Opc).TSFlags & SIInstrFlags::VOP3P) {
convertVOP3PDPPInst(MI);
} else if ((MCII->get(Opc).TSFlags & SIInstrFlags::VOPC) ||
@@ -947,6 +977,15 @@ DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
if (isMacDPP(MI))
convertMacDPPInst(MI);
+ int VDstInIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
+ if (VDstInIdx != -1)
+ insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
+
+ if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 ||
+ MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12)
+ insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::src2);
+
unsigned DescNumOps = MCII->get(Opc).getNumOperands();
if (MI.getNumOperands() < DescNumOps &&
AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
@@ -973,6 +1012,15 @@ DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
if (isMacDPP(MI))
convertMacDPPInst(MI);
+ int VDstInIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
+ if (VDstInIdx != -1)
+ insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
+
+ if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 ||
+ MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12)
+ insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::src2);
+
unsigned Opc = MI.getOpcode();
unsigned DescNumOps = MCII->get(Opc).getNumOperands();
if (MI.getNumOperands() < DescNumOps &&
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index 6c7977e22599c6e..1fc70f0bbbd2d9a 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -1300,7 +1300,9 @@ void AMDGPUInstPrinter::printOpSel(const MCInst *MI, unsigned,
const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Opc = MI->getOpcode();
- if (isPermlane16(Opc)) {
+ if (isPermlane16(Opc) || (isVOP1Cvt_F32_Fp8_Bf8_e64(Opc) &&
+ Opc != AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 &&
+ Opc != AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12)) {
auto FIN = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
auto BCN = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers);
unsigned FI = !!(MI->getOperand(FIN).getImm() & SISrcMods::OP_SEL_0);
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 26ba2575ff34ac0..ae197ee83acc053 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -503,6 +503,17 @@ bool isPermlane16(unsigned Opc) {
Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;
}
+bool isVOP1Cvt_F32_Fp8_Bf8_e64(unsigned Opc) {
+ return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||
+ Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||
+ Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||
+ Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||
+ Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||
+ Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||
+ Opc == AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 ||
+ Opc == AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12;
+}
+
bool isGenericAtomic(unsigned Opc) {
return Opc == AMDGPU::G_AMDGPU_ATOMIC_FMIN ||
Opc == AMDGPU::G_AMDGPU_ATOMIC_FMAX ||
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 50c741760d7143e..9d0bac084feabe2 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -542,6 +542,9 @@ bool isPermlane16(unsigned Opc);
LLVM_READNONE
bool isGenericAtomic(unsigned Opc);
+LLVM_READNONE
+bool isVOP1Cvt_F32_Fp8_Bf8_e64(unsigned Opc);
+
namespace VOPD {
enum Component : unsigned {
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index d604990dc88c207..48202e2250c8500 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -571,6 +571,7 @@ let SubtargetPredicate = isGFX9Only in {
} // End SubtargetPredicate = isGFX9Only
class VOPProfile_Base_CVT_F32_F8<ValueType vt> : VOPProfileI2F <vt, i32> {
+ let HasExtDPP = 1;
let HasExtSDWA = 1;
let HasExtSDWA9 = 1;
let HasExt = 1;
@@ -599,6 +600,7 @@ class Cvt_F32_F8_Pat<SDPatternOperator node, int index,
(inst_sdwa 0, $src, 0, 0, index)
>;
+let SubtargetPredicate = isGFX9Only in {
let OtherPredicates = [HasCvtFP8VOP1Bug] in {
def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)),
(V_CVT_F32_FP8_sdwa 0, $src, 0, 0, 0)>;
@@ -617,6 +619,7 @@ foreach Index = [1, 2, 3] in {
def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index, V_CVT_F32_FP8_sdwa>;
def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index, V_CVT_F32_BF8_sdwa>;
}
+} // End SubtargetPredicate = isGFX9Only
class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index,
VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat<
@@ -626,11 +629,82 @@ class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index,
(inst_e32 $src))
>;
-foreach Index = [0, -1] in {
- def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_fp8, Index,
- V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_sdwa>;
- def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_bf8, Index,
- V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_sdwa>;
+let SubtargetPredicate = isGFX9Only in {
+ foreach Index = [0, -1] in {
+ def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_fp8, Index,
+ V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_sdwa>;
+ def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_bf8, Index,
+ V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_sdwa>;
+ }
+}
+
+
+// Similar to VOPProfile_Base_CVT_F32_F8, but for VOP3 instructions.
+def VOPProfile_Base_CVT_PK_F32_F8_OpSel : VOPProfileI2F <v2f32, i32> {
+ let InsVOP3OpSel = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0,
+ clampmod:$clamp, omod:$omod, op_sel0:$op_sel);
+
+ let HasOpSel = 1;
+ let HasExtVOP3DPP = 0;
+}
+
+def VOPProfile_Base_CVT_F32_F8_OpSel : VOPProfile<[f32, i32, i32, untyped]> {
+ let InsVOP3OpSel = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0,
+ Src1Mod:$src1_modifiers, Src1RC64:$src1,
+ clampmod:$clamp, omod:$omod, op_sel0:$op_sel);
+ let AsmVOP3OpSel = !subst(", $src1_modifiers", "", getAsmVOP3OpSel<2, 0, 0, 1, 1, 0>.ret);
+
+ let HasOpSel = 1;
+ let HasExtDPP = 1;
+ let HasExtVOP3DPP = 1;
+
+ let Src1VOP3DPP = Src1RC64;
+ let AsmVOP3DPP8 = getAsmVOP3DPP8<AsmVOP3OpSel>.ret;
+ let AsmVOP3DPP16 = getAsmVOP3DPP16<AsmVOP3OpSel>.ret;
+}
+
+let SubtargetPredicate = isGFX12Plus, mayRaiseFPException = 0,
+ SchedRW = [WriteFloatCvt] in {
+ defm V_CVT_F32_FP8_OP_SEL : VOP1Inst<"v_cvt_f32_fp8_op_sel", VOPProfile_Base_CVT_F32_F8_OpSel>;
+ defm V_CVT_F32_BF8_OP_SEL : VOP1Inst<"v_cvt_f32_bf8_op_sel", VOPProfile_Base_CVT_F32_F8_OpSel>;
+ defm V_CVT_PK_F32_FP8_OP_SEL : VOP1Inst<"v_cvt_pk_f32_fp8_op_sel", VOPProfile_Base_CVT_PK_F32_F8_OpSel>;
+ defm V_CVT_PK_F32_BF8_OP_SEL : VOP1Inst<"v_cvt_pk_f32_bf8_op_sel", VOPProfile_Base_CVT_PK_F32_F8_OpSel>;
+}
+
+class Cvt_F32_F8_Pat_OpSel<SDPatternOperator node, bits<2> index,
+ VOP1_Pseudo inst_e32, VOP3_Pseudo inst_e64> : GCNPat<
+ (f32 (node i32:$src, index)),
+ !if (index,
+ (inst_e64 !if(index{0}, SRCMODS.OP_SEL_0, SRCMODS.OP_SEL_1), $src,
+ !if(index{1}, SRCMODS.OP_SEL_0, SRCMODS.OP_SEL_1), (i32 0),
+ 0, 0, 0),
+ (inst_e32 $src))
+>;
+
+let SubtargetPredicate = isGFX12Plus in {
+ foreach Index = [0, 1, 2, 3] in {
+ def : Cvt_F32_F8_Pat_OpSel<int_amdgcn_cvt_f32_fp8, Index,
+ V_CVT_F32_FP8_e32, V_CVT_F32_FP8_OP_SEL_e64>;
+ def : Cvt_F32_F8_Pat_OpSel<int_amdgcn_cvt_f32_bf8, Index,
+ V_CVT_F32_BF8_e32, V_CVT_F32_BF8_OP_SEL_e64>;
+ }
+}
+
+class Cvt_PK_F32_F8_Pat_OpSel<SDPatternOperator node, int index,
+ VOP1_Pseudo inst_e32, VOP3_Pseudo inst_e64> : GCNPat<
+ (v2f32 (node i32:$src, index)),
+ !if (index,
+ (inst_e64 SRCMODS.OP_SEL_0, $src, 0, 0, SRCMODS.NONE),
+ (inst_e32 $src))
+>;
+
+let SubtargetPredicate = isGFX12Plus in {
+ foreach Index = [0, -1] in {
+ def : Cvt_PK_F32_F8_Pat_OpSel<int_amdgcn_cvt_pk_f32_fp8, Index,
+ V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_OP_SEL_e64>;
+ def : Cvt_PK_F32_F8_Pat_OpSel<int_amdgcn_cvt_pk_f32_bf8, Index,
+ V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_OP_SEL_e64>;
+ }
}
let SubtargetPredicate = isGFX10Plus in {
@@ -854,6 +928,20 @@ multiclass VOP1_Real_NO_DPP_OP_SEL_with_name<GFXGen Gen, bits<9> op,
VOP3_Real_with_name<Gen, {0, 1, 1, op{6-0}}, opName, asmName>;
+// Define VOP1 instructions using the pseudo instruction with its old profile and
+// VOP3 using the OpSel profile for the pseudo instruction.
+defm V_CVT_F32_FP8 : VOP1_Real_NO_VOP3_with_name_gfx12<0x06c, "V_CVT_F32_FP8", "v_cvt_f32_fp8">;
+defm V_CVT_F32_FP8 : VOP1_Realtriple_e64_with_name<GFX12Gen, 0x06c, "V_CVT_F32_FP8_OP_SEL", "v_cvt_f32_fp8">;
+
+defm V_CVT_F32_BF8 : VOP1_Real_NO_VOP3_with_name_gfx12<0x06d, "V_CVT_F32_BF8", "v_cvt_f32_bf8">;
+defm V_CVT_F32_BF8 : VOP1_Realtriple_e64_with_name<GFX12Gen, 0x06d, "V_CVT_F32_BF8_OP_SEL", "v_cvt_f32_bf8">;
+
+defm V_CVT_PK_F32_FP8 : VOP1_Real_e32_with_name<GFX12Gen, 0x06e, "V_CVT_PK_F32_FP8", "v_cvt_pk_f32_fp8">;
+defm V_CVT_PK_F32_FP8 : VOP3_Real_with_name<GFX12Gen, 0x1ee, "V_CVT_PK_F32_FP8_OP_SEL", "v_cvt_pk_f32_fp8">;
+
+defm V_CVT_PK_F32_BF8 : VOP1_Real_e32_with_name<GFX12Gen, 0x06f, "V_CVT_PK_F32_BF8", "v_cvt_pk_f32_bf8">;
+defm V_CVT_PK_F32_BF8 : VOP3_Real_with_name<GFX12Gen, 0x1ef, "V_CVT_PK_F32_BF8_OP_SEL", "v_cvt_pk_f32_bf8">;
+
defm V_CVT_NEAREST_I32_F32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x00c,
"V_CVT_RPI_I32_F...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/78414
More information about the llvm-commits
mailing list