[llvm] [AMDGPU] Add iglp_opt(2) to provide initial MFMA/Exp interleaving (PR #80370)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 1 17:23:58 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Jeffrey Byrnes (jrbyrnes)
<details>
<summary>Changes</summary>
This provides the iglp_opt(2) builtin which provides specialized mutations to produce custom scheduling.
It was designed against and specifically targets a subset of fused attention V_EXP -> V_MFMA kernels.
The intent is to provide some initial support and performance improvements on a subset of kernels, with a more robust, behavior-maintaining implementation available in a later implementation.
---
Patch is 332.59 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/80370.diff
7 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp (+889-32)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.h (+5-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+3-3)
- (modified) llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp (+4-3)
- (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.large.mir (+2055)
- (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.small.mir (+900)
- (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.tiny.mir (+646)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
index 4462cd8a31f13..175356b55a931 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
@@ -140,8 +140,6 @@ class SchedGroup {
// Count of the number of created SchedGroups, used to initialize SGID.
static unsigned NumSchedGroups;
- const SIInstrInfo *TII;
-
// Try to add and edge from SU A to SU B.
bool tryAddEdge(SUnit *A, SUnit *B);
@@ -154,6 +152,7 @@ class SchedGroup {
SmallVector<SUnit *, 32> Collection;
ScheduleDAGInstrs *DAG;
+ const SIInstrInfo *TII;
// Returns true if SU can be added to this SchedGroup.
bool canAddSU(SUnit &SU) const;
@@ -234,13 +233,13 @@ class SchedGroup {
SchedGroup(SchedGroupMask SGMask, std::optional<unsigned> MaxSize,
ScheduleDAGInstrs *DAG, const SIInstrInfo *TII)
- : SGMask(SGMask), MaxSize(MaxSize), TII(TII), DAG(DAG) {
+ : SGMask(SGMask), MaxSize(MaxSize), DAG(DAG), TII(TII) {
SGID = NumSchedGroups++;
}
SchedGroup(SchedGroupMask SGMask, std::optional<unsigned> MaxSize, int SyncID,
ScheduleDAGInstrs *DAG, const SIInstrInfo *TII)
- : SGMask(SGMask), MaxSize(MaxSize), SyncID(SyncID), TII(TII), DAG(DAG) {
+ : SGMask(SGMask), MaxSize(MaxSize), SyncID(SyncID), DAG(DAG), TII(TII) {
SGID = NumSchedGroups++;
}
};
@@ -442,7 +441,8 @@ void PipelineSolver::convertSyncMapsToArrays() {
template <typename T> void PipelineSolver::linkSchedGroups(T I, T E) {
for (; I != E; ++I) {
auto &GroupA = *I;
- for (auto J = std::next(I); J != E; ++J) {
+ auto J = std::next(I);
+ for (; J != E; ++J) {
auto &GroupB = *J;
GroupA.link(GroupB);
}
@@ -488,7 +488,9 @@ int PipelineSolver::linkSUnit(
continue;
}
auto Group = *I;
- AddedCost += Group.link(*SU, MakePred, AddedEdges);
+ auto Temp = Group.link(*SU, MakePred, AddedEdges);
+
+ AddedCost += Temp;
assert(AddedCost >= 0);
}
return AddedCost;
@@ -633,6 +635,7 @@ bool PipelineSolver::solveExact() {
assert(static_cast<size_t>(CurrConflInstNo) <
PipelineInstrs[CurrSyncGroupIdx].size());
SUToCandSGsPair CurrSU = PipelineInstrs[CurrSyncGroupIdx][CurrConflInstNo];
+
LLVM_DEBUG(dbgs() << "Fitting SU(" << CurrSU.first->NodeNum
<< ") in Pipeline # " << CurrSyncGroupIdx << "\n");
@@ -785,6 +788,7 @@ bool PipelineSolver::solveGreedy() {
while (static_cast<size_t>(CurrSyncGroupIdx) < PipelineInstrs.size()) {
SUToCandSGsPair CurrSU = PipelineInstrs[CurrSyncGroupIdx][CurrConflInstNo];
+
IsBottomUp
? greedyFind(AddedEdges, CurrSU.second.rbegin(), CurrSU.second.rend())
: greedyFind(AddedEdges, CurrSU.second.begin(), CurrSU.second.end());
@@ -838,6 +842,7 @@ void PipelineSolver::solve() {
enum IGLPStrategyID : int {
MFMASmallGemmOptID = 0,
MFMASmallGemmSingleWaveOptID = 1,
+ MFMAExpInterleave = 2
};
// Implement a IGLP scheduling strategy.
@@ -852,7 +857,7 @@ class IGLPStrategy {
virtual void applyIGLPStrategy(
DenseMap<int, SUnitsToCandidateSGsMap> &SyncedInstrs,
DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups,
- bool IsReentry) = 0;
+ IGLPPhase Phase) = 0;
// Returns true if this strategy should be applied to a ScheduleDAG.
virtual bool shouldApplyStrategy(ScheduleDAGInstrs *DAG) = 0;
@@ -871,7 +876,7 @@ class MFMASmallGemmOpt final : public IGLPStrategy {
void applyIGLPStrategy(
DenseMap<int, SUnitsToCandidateSGsMap> &SyncedInstrs,
DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups,
- bool IsReentry) override;
+ IGLPPhase Phase) override;
bool shouldApplyStrategy(ScheduleDAGInstrs *DAG) override { return true; }
@@ -884,7 +889,7 @@ class MFMASmallGemmOpt final : public IGLPStrategy {
void MFMASmallGemmOpt::applyIGLPStrategy(
DenseMap<int, SUnitsToCandidateSGsMap> &SyncedInstrs,
DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups,
- bool IsReentry) {
+ IGLPPhase Phase) {
// Count the number of MFMA instructions.
unsigned MFMACount = 0;
for (const MachineInstr &I : *DAG)
@@ -904,6 +909,854 @@ void MFMASmallGemmOpt::applyIGLPStrategy(
}
}
+class MFMAExpInterleaveOpt final : public IGLPStrategy {
+private:
+ /// Whether or not the instruction is a transitive predecessor of an MFMA
+ /// instruction
+ class IsPipeExp final : public InstructionRule {
+ public:
+ bool apply(const SUnit *SU, const ArrayRef<SUnit *> Collection,
+ SmallVectorImpl<SchedGroup> &SyncPipe) override {
+
+ auto DAG = SyncPipe[0].DAG;
+ auto TII = SyncPipe[0].TII;
+
+ if (Cache->empty()) {
+ auto I = DAG->SUnits.rbegin();
+ auto E = DAG->SUnits.rend();
+ for (; I != E; I++) {
+ if (TII->isMFMAorWMMA(*(I->getInstr())))
+ Cache->push_back(&*I);
+ }
+ }
+
+ if (Cache->empty())
+ return false;
+
+ auto Reaches = (std::any_of(
+ Cache->begin(), Cache->end(), [&SU, &DAG](SUnit *TargetSU) {
+ return DAG->IsReachable(TargetSU, const_cast<SUnit *>(SU));
+ }));
+
+ return Reaches;
+ }
+ IsPipeExp(const SIInstrInfo *TII, unsigned SGID, bool NeedsCache = false)
+ : InstructionRule(TII, SGID, NeedsCache) {}
+ };
+
+ /// Whether or not the insturction is a transitive predecessor of the same
+ /// MFMA instruction as an instruction in a SchedGroup \p Number steps before
+ class ProduceSameMFMAWithPrevN final : public InstructionRule {
+ private:
+ unsigned Number = 1;
+
+ public:
+ bool apply(const SUnit *SU, const ArrayRef<SUnit *> Collection,
+ SmallVectorImpl<SchedGroup> &SyncPipe) override {
+ SchedGroup *OtherGroup = nullptr;
+ for (auto &PipeSG : SyncPipe) {
+ if ((unsigned)PipeSG.getSGID() == SGID - Number) {
+ OtherGroup = &PipeSG;
+ }
+ }
+
+ if (!OtherGroup)
+ return false;
+ if (!OtherGroup->Collection.size())
+ return true;
+
+ auto DAG = SyncPipe[0].DAG;
+
+ if (Cache->empty()) {
+ auto TII = SyncPipe[0].TII;
+ SmallVector<SUnit *, 8> Worklist;
+
+ auto I = DAG->SUnits.rbegin();
+ auto E = DAG->SUnits.rend();
+ for (; I != E; I++)
+ if (TII->isMFMAorWMMA(*(I->getInstr())))
+ Worklist.push_back(&*I);
+
+ for (auto BaseSU : OtherGroup->Collection) {
+ if (!Cache->empty())
+ break;
+ for (auto CandSU : Worklist) {
+ if (DAG->IsReachable(CandSU, BaseSU)) {
+ Cache->push_back(CandSU);
+ break;
+ }
+ }
+ }
+ }
+ if (Cache->empty())
+ return false;
+
+ return DAG->IsReachable((*Cache)[0], const_cast<SUnit *>(SU));
+ }
+
+ ProduceSameMFMAWithPrevN(unsigned Number, const SIInstrInfo *TII,
+ unsigned SGID, bool NeedsCache = false)
+ : InstructionRule(TII, SGID, NeedsCache), Number(Number) {}
+ };
+
+ /// Whether or not the instruction has less than \p Size immediate successors
+ class LessThanNSuccs final : public InstructionRule {
+ private:
+ unsigned Size = 1;
+
+ public:
+ bool apply(const SUnit *SU, const ArrayRef<SUnit *> Collection,
+ SmallVectorImpl<SchedGroup> &SyncPipe) override {
+ if (!SyncPipe.size())
+ return false;
+
+ return SU->Succs.size() < Size;
+ }
+ LessThanNSuccs(unsigned Size, const SIInstrInfo *TII, unsigned SGID,
+ bool NeedsCache = false)
+ : InstructionRule(TII, SGID, NeedsCache), Size(Size) {}
+ };
+
+ // Whether or not the instruction is an V_CVT instruction.
+ class IsCvt final : public InstructionRule {
+ private:
+ public:
+ bool apply(const SUnit *SU, const ArrayRef<SUnit *> Collection,
+ SmallVectorImpl<SchedGroup> &SyncPipe) override {
+ auto Opc = SU->getInstr()->getOpcode();
+ return Opc == AMDGPU::V_CVT_F16_F32_e32 ||
+ Opc == AMDGPU::V_CVT_F16_F32_e32_gfx10 ||
+ Opc == AMDGPU::V_CVT_I32_F32_e32 ||
+ Opc == AMDGPU::V_CVT_I32_F32_e32_gfx10 ||
+ Opc == AMDGPU::V_CVT_I32_F32_e32_gfx11;
+ }
+ IsCvt(const SIInstrInfo *TII, unsigned SGID, bool NeedsCache = false)
+ : InstructionRule(TII, SGID, NeedsCache) {}
+ };
+
+ // Whether or not the instruction is an V_FMA_F32 instruction.
+ class IsFMA final : public InstructionRule {
+ private:
+ public:
+ bool apply(const SUnit *SU, const ArrayRef<SUnit *> Collection,
+ SmallVectorImpl<SchedGroup> &SyncPipe) override {
+ return SU->getInstr()->getOpcode() == AMDGPU::V_FMA_F32_e64;
+ }
+ IsFMA(unsigned Val, const SIInstrInfo *TII, unsigned SGID,
+ bool NeedsCache = false)
+ : InstructionRule(TII, SGID, NeedsCache) {}
+ };
+
+ /// Whether or not the instruction is an immediate RAW successor
+ /// of the SchedGroup \p Distance steps before.
+ class IsSuccOfPrevNthGroup final : public InstructionRule {
+ private:
+ unsigned Distance = 1;
+
+ public:
+ bool apply(const SUnit *SU, const ArrayRef<SUnit *> Collection,
+ SmallVectorImpl<SchedGroup> &SyncPipe) override {
+ SchedGroup *OtherGroup = nullptr;
+ if (!SyncPipe.size())
+ return false;
+
+ for (auto &PipeSG : SyncPipe) {
+ if ((unsigned)PipeSG.getSGID() == SGID - Distance) {
+ OtherGroup = &PipeSG;
+ }
+ }
+
+ if (!OtherGroup)
+ return false;
+ if (!OtherGroup->Collection.size())
+ return true;
+
+ for (auto &OtherEle : OtherGroup->Collection) {
+ for (auto &Succ : OtherEle->Succs) {
+ if (Succ.getSUnit() == SU && Succ.getKind() == SDep::Data)
+ return true;
+ }
+ }
+
+ return false;
+ }
+ IsSuccOfPrevNthGroup(unsigned Distance, const SIInstrInfo *TII,
+ unsigned SGID, bool NeedsCache = false)
+ : InstructionRule(TII, SGID, NeedsCache), Distance(Distance) {}
+ };
+
+ /// Whether or not the instruction is a transitive successor of any
+ /// instruction the the SchedGroup \p Distance steps before.
+ class IsReachableFromPrevNthGroup final : public InstructionRule {
+ private:
+ unsigned Distance = 1;
+
+ public:
+ bool apply(const SUnit *SU, const ArrayRef<SUnit *> Collection,
+ SmallVectorImpl<SchedGroup> &SyncPipe) override {
+ SchedGroup *OtherGroup = nullptr;
+ if (!SyncPipe.size())
+ return false;
+
+ for (auto &PipeSG : SyncPipe) {
+ if ((unsigned)PipeSG.getSGID() == SGID - Distance) {
+ OtherGroup = &PipeSG;
+ }
+ }
+
+ if (!OtherGroup)
+ return false;
+ if (!OtherGroup->Collection.size())
+ return true;
+
+ auto DAG = SyncPipe[0].DAG;
+
+ for (auto &OtherEle : OtherGroup->Collection)
+ if (DAG->IsReachable(const_cast<SUnit *>(SU), OtherEle))
+ return true;
+
+ return false;
+ }
+ IsReachableFromPrevNthGroup(unsigned Distance, const SIInstrInfo *TII,
+ unsigned SGID, bool NeedsCache = false)
+ : InstructionRule(TII, SGID, NeedsCache), Distance(Distance) {}
+ };
+
+ /// Whether or not the instruction is the \p Number th occuring DS_READ
+ /// instruciton
+ class IsNthDSR final : public InstructionRule {
+ private:
+ unsigned Number = 1;
+
+ public:
+ bool apply(const SUnit *SU, const ArrayRef<SUnit *> Collection,
+ SmallVectorImpl<SchedGroup> &SyncPipe) override {
+
+ auto DAG = SyncPipe[0].DAG;
+ auto TII = SyncPipe[0].TII;
+ unsigned Counter = 0;
+ if (Cache->empty()) {
+ for (auto &ParseSU : DAG->SUnits) {
+ auto MI = ParseSU.getInstr();
+ if (TII->isDS(MI->getOpcode()) && MI->mayLoad()) {
+ if (Counter == Number) {
+ Cache->push_back(&ParseSU);
+ break;
+ }
+
+ ++Counter;
+ }
+ }
+ }
+
+ if (Cache->empty())
+ return false;
+
+ return (*Cache)[0]->NodeNum <= SU->NodeNum;
+ }
+ IsNthDSR(unsigned Number, const SIInstrInfo *TII, unsigned SGID,
+ bool NeedsCache = false)
+ : InstructionRule(TII, SGID, NeedsCache), Number(Number) {}
+ };
+
+ // Whether or not the instruction is a transitive predecessor of any TRANS
+ // instruction
+ class IsPipeMFMA final : public InstructionRule {
+ public:
+ bool apply(const SUnit *SU, const ArrayRef<SUnit *> Collection,
+ SmallVectorImpl<SchedGroup> &SyncPipe) override {
+
+ SmallVector<SUnit *, 12> Worklist;
+ auto DAG = SyncPipe[0].DAG;
+ auto TII = SyncPipe[0].TII;
+ if (Cache->empty()) {
+ for (auto &SU : DAG->SUnits)
+ if (TII->isTRANS(SU.getInstr()->getOpcode()))
+ Cache->push_back(&SU);
+ }
+
+ if (Cache->empty())
+ return false;
+
+ return !(std::any_of(
+ Cache->begin(), Cache->end(), [&SU, &DAG](SUnit *BaseSU) {
+ return DAG->IsReachable(BaseSU, const_cast<SUnit *>(SU));
+ }));
+ }
+
+ IsPipeMFMA(const SIInstrInfo *TII, unsigned SGID, bool NeedsCache = false)
+ : InstructionRule(TII, SGID, NeedsCache) {}
+ };
+
+ // Whether the instruction occurs after the first TRANS instruction. This
+ // implies the instruction can not be a predecessor of the first TRANS
+ // insruction
+ class OccursAfterExp final : public InstructionRule {
+ public:
+ bool apply(const SUnit *SU, const ArrayRef<SUnit *> Collection,
+ SmallVectorImpl<SchedGroup> &SyncPipe) override {
+
+ SmallVector<SUnit *, 12> Worklist;
+ auto DAG = SyncPipe[0].DAG;
+ auto TII = SyncPipe[0].TII;
+ if (Cache->empty()) {
+ for (auto &SU : DAG->SUnits)
+ if (TII->isTRANS(SU.getInstr()->getOpcode())) {
+ Cache->push_back(&SU);
+ break;
+ }
+ }
+
+ if (Cache->empty())
+ return false;
+
+ return SU->NodeNum > (*Cache)[0]->NodeNum;
+ }
+
+ OccursAfterExp(const SIInstrInfo *TII, unsigned SGID,
+ bool NeedsCache = false)
+ : InstructionRule(TII, SGID, NeedsCache) {}
+ };
+
+ // Whether the SU is a not a successor of any element in the previous
+ // SchedGroup
+ class IsNotSuccOfPrevGroup final : public InstructionRule {
+ public:
+ bool apply(const SUnit *SU, const ArrayRef<SUnit *> Collection,
+ SmallVectorImpl<SchedGroup> &SyncPipe) override {
+ SchedGroup *OtherGroup = nullptr;
+ for (auto &PipeSG : SyncPipe) {
+ if ((unsigned)PipeSG.getSGID() == SGID - 1) {
+ OtherGroup = &PipeSG;
+ }
+ }
+
+ if (!OtherGroup)
+ return false;
+ if (!OtherGroup->Collection.size())
+ return true;
+
+ // Does the previous VALU have this DS_Write as a successor
+ return !(std::any_of(OtherGroup->Collection.begin(),
+ OtherGroup->Collection.end(), [&SU](SUnit *Elt) {
+ return std::any_of(Elt->Succs.begin(),
+ Elt->Succs.end(),
+ [&SU](SDep &Succ) {
+ return Succ.getSUnit() == SU;
+ });
+ }));
+ }
+ IsNotSuccOfPrevGroup(const SIInstrInfo *TII, unsigned SGID,
+ bool NeedsCache = false)
+ : InstructionRule(TII, SGID, NeedsCache) {}
+ };
+
+public:
+ void applyIGLPStrategy(
+ DenseMap<int, SUnitsToCandidateSGsMap> &SyncedInstrs,
+ DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups,
+ IGLPPhase Phase) override;
+
+ bool shouldApplyStrategy(ScheduleDAGInstrs *DAG) override { return true; }
+
+ MFMAExpInterleaveOpt(ScheduleDAGInstrs *DAG, const SIInstrInfo *TII)
+ : IGLPStrategy(DAG, TII) {
+ IsBottomUp = 0;
+ }
+};
+
+static unsigned TransPipeCount = 0;
+static unsigned MFMAPipeCount = 0;
+static unsigned MFMAEnablement = 0;
+static unsigned ExpRequirement = 0;
+
+void MFMAExpInterleaveOpt::applyIGLPStrategy(
+ DenseMap<int, SUnitsToCandidateSGsMap> &SyncedInstrs,
+ DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups,
+ IGLPPhase Phase) {
+
+ const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+
+ if (Phase == IGLPPhase::Initial) {
+ SmallVector<SUnit *, 10> ExpPipeCands;
+ SmallVector<SUnit *, 10> MFMAPipeCands;
+ SmallVector<SUnit *, 10> MFMAPipeSUs;
+ SmallVector<SUnit *, 10> PackSUs;
+
+ auto isBitPack = [](unsigned Opc) {
+ return Opc == AMDGPU::V_PACK_B32_F16_e64 ||
+ Opc == AMDGPU::V_PACK_B32_F16_gfx10 ||
+ Opc == AMDGPU::V_PACK_B32_F16_e64_gfx11 ||
+ Opc == AMDGPU::V_PERM_B32_e64 ||
+ Opc == AMDGPU::V_PERM_B32_e64_gfx11;
+ };
+ for (SUnit &SU : DAG->SUnits) {
+ auto Opc = SU.getInstr()->getOpcode();
+ if (TII->isTRANS(Opc)) {
+ // Avoid counting a potential bonus V_EXP which all the MFMA depend on
+ if (SU.Succs.size() >= 7)
+ continue;
+ ExpPipeCands.push_back(&SU);
+ }
+
+ if (TII->isMFMAorWMMA(*SU.getInstr()))
+ MFMAPipeCands.push_back(&SU);
+
+ if (isBitPack(Opc))
+ PackSUs.push_back(&SU);
+ }
+
+ if (!(PackSUs.size() && MFMAPipeCands.size() && ExpPipeCands.size()))
+ return;
+
+ TransPipeCount = 0;
+ MFMAPipeCount = 0;
+ MFMAEnablement = 0;
+ ExpRequirement = 0;
+
+ std::optional<SUnit *> TempMFMA;
+ std::optional<SUnit *> TempExp;
+ // Count the number of EXPs that reach an MFMA
+ for (auto &PredSU : ExpPipeCands) {
+ for (auto &SuccSU : MFMAPipeCands) {
+ if (DAG->IsReachable(SuccSU, PredSU)) {
+ if (!TempExp.has_value()) {
+ TempExp = PredSU;
+ TempMFMA = SuccSU;
+ }
+ MFMAPipeSUs.push_back(SuccSU);
+ ++TransPipeCount;
+ break;
+ }
+ }
+ }
+
+ if (!TempExp.has_value())
+ return;
+
+ // Count the number of MFMAs that are reached by an EXP
+ for (auto &SuccSU : MFMAPipeCands) {
+ if (std::find_if(MFMAPipeSUs.begin(), MFMAPipeSUs.end(),
+ [&SuccSU](SUnit *PotentialMatch) {
+ return PotentialMatch == SuccSU;
+ })) {
+ ++MFMAPipeCount;
+ continue;
+ }
+ for (auto &PredSU : ExpPipeCands) {
+ if (DAG->IsReachable(SuccSU, PredSU)) {
+ ++MFMAPipeCount;
+ break;
+ }
+ }
+ }
+
+ if (!TempMFMA.has_value() || !TempExp.has_value())
+ return;
+
+ // The number of bit pack operations that depend on a single V_EXP
+ unsigned PackSuccCount = std::count_if(
+ PackSUs.begin(), PackSUs.end(), [this, &TempExp](SUnit *VPack) {
+ return DAG->IsReachable(VPack, *TempExp);
+ });
+
+ // The number of bit pack operations an MFMA depends on
+ unsigned PackPredCount =
+ std::count_if((*TempMFMA)->Preds.begin(), (*TempMFMA)->Preds.end(),
+ [&isBitPack](SDep &Pred) {
+ auto Opc = Pred.getSUnit()->getInstr()->getOpcode();
+ return isBitPack(Opc);
+ });
+
+ auto PackPred =
+ std::find_if((*TempMFMA)->Preds.begin(), (*TempMFMA)->Preds.end(),
+ [&isBitPack](SDep &Pred) {
+ auto Opc = Pred.getSUnit()->getInstr()->getOpcode();
+ return isBitPack(Opc);
+ });
+
+ // How many MFMAs depend on a single bit pack operation
+ MFMAEnablement =
+ std::count_if(PackPred->getSUnit()->Succs.begin(),
+ PackPred->getSUnit()->Succs.end(), [&TII](SDep &Succ) {
+ return TII->isMFMAorWMMA(*Succ.getSUnit()->getInstr());
+ });
+
+ // The number of MFMAs that depend on a single V_EXP
+ MFMAEnablement *= PackSuccCount;
+
+ // The number of V_EXPs required to resolve all dependencies for an MFMA
+ ExpRequirement =
+ std::count_if(ExpPipeCands.begin(), ExpPipeCands...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/80370
More information about the llvm-commits
mailing list