[llvm] [AMDGPU] VOPD/VOPD3 changes for gfx1250 (PR #147602)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 8 15:02:13 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mc
@llvm/pr-subscribers-backend-amdgpu
Author: Stanislav Mekhanoshin (rampitec)
<details>
<summary>Changes</summary>
---
Patch is 4.24 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/147602.diff
24 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+186-20)
- (modified) llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp (+52-17)
- (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+2)
- (modified) llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp (+88-16)
- (modified) llvm/lib/Target/AMDGPU/GCNVOPDUtils.h (+2-1)
- (modified) llvm/lib/Target/AMDGPU/SIDefines.h (+2)
- (modified) llvm/lib/Target/AMDGPU/SIInstrFormats.td (+3)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+110-22)
- (modified) llvm/lib/Target/AMDGPU/SIRegisterInfo.td (+2)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+129-24)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+86-20)
- (modified) llvm/lib/Target/AMDGPU/VOP2Instructions.td (+54-30)
- (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+10-3)
- (modified) llvm/lib/Target/AMDGPU/VOPCInstructions.td (+16-16)
- (modified) llvm/lib/Target/AMDGPU/VOPDInstructions.td (+112-17)
- (modified) llvm/lib/Target/AMDGPU/VOPInstructions.td (+7-2)
- (modified) llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll (+35-40)
- (added) llvm/test/CodeGen/AMDGPU/vopd-combine-gfx1250.mir (+3243)
- (added) llvm/test/MC/AMDGPU/gfx1250_asm_vopd.s (+16275)
- (added) llvm/test/MC/AMDGPU/gfx1250_asm_vopd3.s (+19063)
- (added) llvm/test/MC/AMDGPU/gfx1250_asm_vopd_errs.s (+326)
- (added) llvm/test/MC/AMDGPU/gfx1250_asm_vopd_features.s (+109)
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vopd.txt (+12205)
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vopd3.txt (+14278)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 3af140461afdb..41d06b42c32a8 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -335,6 +335,22 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
}
+ bool isRegOrInlineImmWithFP64InputMods() const {
+ return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
+ }
+
+ bool isVRegWithInputMods(unsigned RCID) const {
+ return isRegClass(RCID);
+ }
+
+ bool isVRegWithFP32InputMods() const {
+ return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
+ }
+
+ bool isVRegWithFP64InputMods() const {
+ return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
+ }
+
bool isPackedFP16InputMods() const {
return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
}
@@ -527,7 +543,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
}
- bool isVCSrcB64() const {
+ bool isVCSrc_b64() const {
return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
}
@@ -553,7 +569,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
}
- bool isVCSrcF64() const {
+ bool isVCSrc_f64() const {
return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
}
@@ -601,7 +617,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
}
- bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); }
+ bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); }
bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
@@ -617,15 +633,11 @@ class AMDGPUOperand : public MCParsedAsmOperand {
bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
- bool isVCSrcV2FP32() const {
- return isVCSrcF64();
- }
+ bool isVCSrcV2FP32() const { return isVCSrc_f64(); }
bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
- bool isVCSrcV2INT32() const {
- return isVCSrcB64();
- }
+ bool isVCSrc_v2b32() const { return isVCSrc_b64(); }
bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
@@ -633,7 +645,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
}
- bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(MVT::f64); }
+ bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); }
bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
@@ -1527,6 +1539,8 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
+ bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); }
+
bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
bool isGFX10_BEncoding() const {
@@ -1774,8 +1788,13 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
bool validateSOPLiteral(const MCInst &Inst) const;
bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
- bool validateVOPDRegBankConstraints(const MCInst &Inst,
- const OperandVector &Operands);
+ std::optional<unsigned>
+ checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3);
+ bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);
+ bool tryVOPD(const MCInst &Inst);
+ bool tryVOPD3(const MCInst &Inst);
+ bool tryAnotherVOPDEncoding(const MCInst &Inst);
+
bool validateIntClampSupported(const MCInst &Inst);
bool validateMIMGAtomicDMask(const MCInst &Inst);
bool validateMIMGGatherDMask(const MCInst &Inst);
@@ -3505,6 +3524,13 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
}
}
+ // Asm can first try to match VOPD or VOPD3. By failing early here with
+ // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
+ // Checking later during validateInstruction does not give a chance to retry
+ // parsing as a different encoding.
+ if (tryAnotherVOPDEncoding(Inst))
+ return Match_InvalidOperand;
+
return Match_Success;
}
@@ -3685,8 +3711,10 @@ static OperandIndices getSrcOperandIndices(unsigned Opcode,
return {getNamedOperandIdx(Opcode, OpName::src0X),
getNamedOperandIdx(Opcode, OpName::vsrc1X),
+ getNamedOperandIdx(Opcode, OpName::vsrc2X),
getNamedOperandIdx(Opcode, OpName::src0Y),
getNamedOperandIdx(Opcode, OpName::vsrc1Y),
+ getNamedOperandIdx(Opcode, OpName::vsrc2Y),
ImmXIdx,
ImmIdx};
}
@@ -3816,12 +3844,12 @@ bool AMDGPUAsmParser::validateConstantBusLimitations(
return false;
}
-bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
- const MCInst &Inst, const OperandVector &Operands) {
+std::optional<unsigned> AMDGPUAsmParser::checkVOPDRegBankConstraints(
+ const MCInst &Inst, bool AsVOPD3) {
const unsigned Opcode = Inst.getOpcode();
if (!isVOPD(Opcode))
- return true;
+ return {};
const MCRegisterInfo *TRI = getContext().getRegisterInfo();
@@ -3833,15 +3861,64 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
};
// On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
- bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
+ bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
+ Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
+ Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250;
+ bool AllowSameVGPR = isGFX1250();
+
+ if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
+ for (auto OpName : {OpName::src0X, OpName::src0Y}) {
+ int I = getNamedOperandIdx(Opcode, OpName);
+ const MCOperand &Op = Inst.getOperand(I);
+ if (!Op.isImm())
+ continue;
+ int64_t Imm = Op.getImm();
+ if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) &&
+ !AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm()))
+ return I;
+ }
+
+ for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y,
+ OpName::vsrc2X, OpName::vsrc2Y,
+ OpName::imm}) {
+ int I = getNamedOperandIdx(Opcode, OpName);
+ if (I == -1)
+ continue;
+ const MCOperand &Op = Inst.getOperand(I);
+ if (Op.isImm())
+ return I;
+ }
+ }
const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
auto InvalidCompOprIdx =
- InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
- if (!InvalidCompOprIdx)
+ InstInfo.getInvalidCompOperandIndex(getVRegIdx, *TRI, SkipSrc,
+ AllowSameVGPR, AsVOPD3);
+
+ return InvalidCompOprIdx;
+}
+
+bool AMDGPUAsmParser::validateVOPD(
+ const MCInst &Inst, const OperandVector &Operands) {
+
+ unsigned Opcode = Inst.getOpcode();
+ bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;
+
+ if (AsVOPD3) {
+ for (unsigned I = 0, E = Operands.size(); I != E; ++I) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
+ if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
+ (Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))
+ Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions");
+ }
+ }
+
+ auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
+ if (!InvalidCompOprIdx.has_value())
return true;
auto CompOprIdx = *InvalidCompOprIdx;
+ const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
auto ParsedIdx =
std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
@@ -3849,7 +3926,10 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
if (CompOprIdx == VOPD::Component::DST) {
- Error(Loc, "one dst register must be even and the other odd");
+ if (AsVOPD3)
+ Error(Loc, "dst registers must be distinct");
+ else
+ Error(Loc, "one dst register must be even and the other odd");
} else {
auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
Error(Loc, Twine("src") + Twine(CompSrcIdx) +
@@ -3859,6 +3939,75 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
return false;
}
+// \returns true if \p Inst does not satisfy VOPD constraints, but can be
+// potentially used as VOPD3 with the same operands.
+bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {
+ // First check if it fits VOPD
+ auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false);
+ if (!InvalidCompOprIdx.has_value())
+ return false;
+
+ // Then if it fits VOPD3
+ InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true);
+ if (InvalidCompOprIdx.has_value()) {
+ // If failed operand is dst it is better to show error about VOPD3
+ // instruction as it has more capabilities and error message will be
+ // more informative. If the dst is not legal for VOPD3, then it is not
+ // legal for VOPD either.
+ if (*InvalidCompOprIdx == VOPD::Component::DST)
+ return true;
+
+ // Otherwise prefer VOPD as we may find ourselves in an awkward situation
+ // with a conflict in tied implicit src2 of fmac and no asm operand to
+ // to point to.
+ return false;
+ }
+ return true;
+}
+
+// \returns true is a VOPD3 instruction can be also represented as a shorter
+// VOPD encoding.
+bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
+ const unsigned Opcode = Inst.getOpcode();
+ const auto &II = getVOPDInstInfo(Opcode, &MII);
+ unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI());
+ if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X ||
+ !getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y)
+ return false;
+
+ // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
+ // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
+ // be parsed as VOPD which does not accept src2.
+ if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
+ II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
+ return false;
+
+ // If any modifiers are set this cannot be VOPD.
+ for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
+ OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
+ OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
+ int I = getNamedOperandIdx(Opcode, OpName);
+ if (I == -1)
+ continue;
+ if (Inst.getOperand(I).getImm())
+ return false;
+ }
+
+ return !tryVOPD3(Inst);
+}
+
+// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
+// form but switch to VOPD3 otherwise.
+bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
+ const unsigned Opcode = Inst.getOpcode();
+ if (!isGFX1250() || !isVOPD(Opcode))
+ return false;
+
+ if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
+ return tryVOPD(Inst);
+ return tryVOPD3(Inst);
+}
+
bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
const unsigned Opc = Inst.getOpcode();
@@ -5179,7 +5328,7 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
if (!validateConstantBusLimitations(Inst, Operands)) {
return false;
}
- if (!validateVOPDRegBankConstraints(Inst, Operands)) {
+ if (!validateVOPD(Inst, Operands)) {
return false;
}
if (!validateIntClampSupported(Inst)) {
@@ -9180,8 +9329,14 @@ ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
// Create VOPD MCInst operands using parsed assembler operands.
void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
+ const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
+
auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
+ if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
+ Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
+ return;
+ }
if (Op.isReg()) {
Op.addRegOperands(Inst, 1);
return;
@@ -9210,6 +9365,17 @@ void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
if (CInfo.hasSrc2Acc())
addOp(CInfo.getIndexOfDstInParsedOperands());
}
+
+ int BitOp3Idx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
+ AMDGPU::OpName::bitop3);
+ if (BitOp3Idx != -1) {
+ OptionalImmIndexMap OptIdx;
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
+ if (Op.isImm())
+ OptIdx[Op.getImmTy()] = Operands.size() - 1;
+
+ addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
+ }
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
index ccc711a0bcc4e..26322a4de54fc 100644
--- a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
@@ -42,11 +42,13 @@ class GCNCreateVOPD {
class VOPDCombineInfo {
public:
VOPDCombineInfo() = default;
- VOPDCombineInfo(MachineInstr *First, MachineInstr *Second)
- : FirstMI(First), SecondMI(Second) {}
+ VOPDCombineInfo(MachineInstr *First, MachineInstr *Second,
+ bool VOPD3 = false)
+ : FirstMI(First), SecondMI(Second), IsVOPD3(VOPD3) {}
MachineInstr *FirstMI;
MachineInstr *SecondMI;
+ bool IsVOPD3;
};
public:
@@ -59,9 +61,9 @@ class GCNCreateVOPD {
unsigned Opc2 = SecondMI->getOpcode();
unsigned EncodingFamily =
AMDGPU::getVOPDEncodingFamily(SII->getSubtarget());
- int NewOpcode =
- AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1),
- AMDGPU::getVOPDOpcode(Opc2), EncodingFamily);
+ int NewOpcode = AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1, CI.IsVOPD3),
+ AMDGPU::getVOPDOpcode(Opc2, CI.IsVOPD3),
+ EncodingFamily, CI.IsVOPD3);
assert(NewOpcode != -1 &&
"Should have previously determined this as a possible VOPD\n");
@@ -79,12 +81,36 @@ class GCNCreateVOPD {
VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
}
+ const AMDGPU::OpName Mods[2][3] = {
+ {AMDGPU::OpName::src0X_modifiers, AMDGPU::OpName::vsrc1X_modifiers,
+ AMDGPU::OpName::vsrc2X_modifiers},
+ {AMDGPU::OpName::src0Y_modifiers, AMDGPU::OpName::vsrc1Y_modifiers,
+ AMDGPU::OpName::vsrc2Y_modifiers}};
+ const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
+ AMDGPU::OpName::src1_modifiers,
+ AMDGPU::OpName::src2_modifiers};
+ const unsigned VOPDOpc = VOPDInst->getOpcode();
+
for (auto CompIdx : VOPD::COMPONENTS) {
auto CompSrcOprNum = InstInfo[CompIdx].getCompSrcOperandsNum();
+ bool IsVOP3 = SII->isVOP3(*MI[CompIdx]);
for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOprNum; ++CompSrcIdx) {
- auto MCOprIdx = InstInfo[CompIdx].getIndexOfSrcInMCOperands(CompSrcIdx);
+ if (AMDGPU::hasNamedOperand(VOPDOpc, Mods[CompIdx][CompSrcIdx])) {
+ const MachineOperand *Mod =
+ SII->getNamedOperand(*MI[CompIdx], SrcMods[CompSrcIdx]);
+ VOPDInst.addImm(Mod ? Mod->getImm() : 0);
+ }
+ auto MCOprIdx =
+ InstInfo[CompIdx].getIndexOfSrcInMCOperands(CompSrcIdx, IsVOP3);
VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
}
+ if (MI[CompIdx]->getOpcode() == AMDGPU::V_CNDMASK_B32_e32 && CI.IsVOPD3)
+ VOPDInst.addReg(AMDGPU::VCC_LO);
+ }
+
+ if (CI.IsVOPD3) {
+ if (unsigned BitOp2 = AMDGPU::getBitOp2(Opc2))
+ VOPDInst.addImm(BitOp2);
}
SII->fixImplicitOperands(*VOPDInst);
@@ -109,6 +135,8 @@ class GCNCreateVOPD {
const SIInstrInfo *SII = ST->getInstrInfo();
bool Changed = false;
+ unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(*ST);
+ bool HasVOPD3 = ST->hasVOPD3();
SmallVector<VOPDCombineInfo> ReplaceCandidates;
@@ -124,19 +152,26 @@ class GCNCreateVOPD {
auto *SecondMI = &*MII;
unsigned Opc = FirstMI->getOpcode();
unsigned Opc2 = SecondMI->getOpcode();
- llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);
- llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);
VOPDCombineInfo CI;
- if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y)
- CI = VOPDCombineInfo(FirstMI, SecondMI);
- else if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)
- CI = VOPDCombineInfo(SecondMI, FirstMI);
- else
- continue;
- // checkVOPDRegConstraints cares about program order, but doReplace
- // cares about X-Y order in the constituted VOPD
- if (llvm::checkVOPDRegConstraints(*SII, *FirstMI, *SecondMI)) {
+ const auto checkVOPD = [&](bool VOPD3) -> bool {
+ llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD =
+ AMDGPU::getCanBeVOPD(Opc, EncodingFamily, VOPD3);
+ llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD =
+ AMDGPU::getCanBeVOPD(Opc2, EncodingFamily, VOPD3);
+
+ if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y)
+ CI = VOPDCombineInfo(FirstMI, SecondMI, VOPD3);
+ else if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)
+ CI = VOPDCombineInfo(SecondMI, FirstMI, VOPD3);
+ else
+ return false;
+ // checkVOPDRegConstraints cares about program order, but doReplace
+ // cares about X-Y order in the constituted VOPD
+ return llvm::checkVOPDRegConstraints(*SII, *FirstMI, *SecondMI, VOPD3);
+ };
+
+ if (checkVOPD(false) || (HasVOPD3 && checkVOPD(true))) {
ReplaceCandidates.push_back(CI);
++MII;
}
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index fa1209db2fa07..80fd830d10aa4 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1478,6 +1478,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasGFX1250Insts() const { return GFX1250Insts; }
+ bool hasVOPD3() const { return GFX1250Insts; }
+
// \returns true if target has S_SETPRIO_INC_WG instruction.
bool hasSetPrioIncWgInst() const { return HasSetPrioIncWgInst; }
diff --git a/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp b/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp
index 33c208495c500..ae5db01f6e119 100644
--- a/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp
@@ -36,11 +36,20 @@ using namespace llvm;
bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII,
const MachineInstr &FirstMI,
- const MachineInstr &SecondMI) {
+ const MachineInstr &SecondMI,
+ bool IsVOPD3) {
namespace VOPD = AMDGPU::VOPD;
const MachineFunction *MF = FirstMI.getMF();
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+
+ if (IsVOPD3 && !ST.hasVOPD3())
+ return false;
+ if (!IsVOPD3 && (TII.isVOP3(FirstMI) || TII.isVOP3(SecondMI)))
+ return false;
+ if (TII.isDPP(FirstMI) || TII.isDPP(SecondMI))
+ return false;
+
const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(ST.getRegisterInfo());
const MachineRegisterInfo &MRI = MF->getRegInfo();
// Literals also count against scalar bus limit
@@ -80,23 +89,61 @@ bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII,
for (auto CompIdx : VOPD::COMPONENTS) {
const MachineInstr &MI = (CompIdx == VOPD::X) ? FirstMI : SecondMI;
- const MachineOperand &Src0 = MI.getOperand(VOPD::Component::SRC0);
+ const MachineOperand &Src0 = *TII.getNam...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/147602
More information about the llvm-commits
mailing list