[llvm-branch-commits] [llvm] [AMDGPU] Add HWUI pressure heuristics to coexec strategy (PR #184929)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Mar 6 05:25:42 PST 2026
================
@@ -41,6 +41,370 @@ static SUnit *pickOnlyChoice(SchedBoundary &Zone) {
return OnlyChoice;
}
+InstructionFlavor llvm::classifyFlavor(const MachineInstr *MI,
+ const SIInstrInfo *SII) {
+ if (!MI || MI->isDebugInstr())
+ return InstructionFlavor::Other;
+
+ unsigned Opc = MI->getOpcode();
+
+ // Check for specific opcodes first.
+ if (Opc == AMDGPU::ATOMIC_FENCE || Opc == AMDGPU::S_WAIT_ASYNCCNT ||
+ Opc == AMDGPU::S_WAIT_TENSORCNT || Opc == AMDGPU::S_BARRIER_WAIT ||
+ Opc == AMDGPU::S_BARRIER_SIGNAL_IMM)
+ return InstructionFlavor::Fence;
+
+ if (Opc == AMDGPU::TENSOR_LOAD_TO_LDS_D2 ||
+ Opc == AMDGPU::TENSOR_LOAD_TO_LDS ||
+ Opc == AMDGPU::GLOBAL_LOAD_ASYNC_TO_LDS_B32 ||
+ Opc == AMDGPU::GLOBAL_LOAD_ASYNC_TO_LDS_B32_SADDR)
+ return InstructionFlavor::DMA;
+
+ if (SII->isMFMAorWMMA(*MI))
+ return InstructionFlavor::WMMA;
+
+ if (SII->isTRANS(*MI))
+ return InstructionFlavor::TRANS;
+
+ if (SII->isVALU(*MI))
+ return InstructionFlavor::SingleCycleVALU;
+
+ if (SII->isDS(*MI))
+ return InstructionFlavor::DS;
+
+ if (SII->isFLAT(*MI) || SII->isFLATGlobal(*MI) || SII->isFLATScratch(*MI))
+ return InstructionFlavor::VMEM;
+
+ if (SII->isSALU(*MI))
+ return InstructionFlavor::SALU;
+
+ return InstructionFlavor::Other;
+}
+
+SUnit *HardwareUnitInfo::getNextTargetSU(bool LookDeep) {
+ for (auto *PrioritySU : PrioritySUs) {
+ if (!PrioritySU->isTopReady())
+ return PrioritySU;
+ }
+
+ if (!LookDeep)
+ return nullptr;
+
+ unsigned MinDepth = std::numeric_limits<unsigned int>::max();
+ SUnit *TargetSU = nullptr;
+ for (auto *SU : AllSUs) {
+ if (SU->isScheduled)
+ continue;
+
+ if (SU->isTopReady())
+ continue;
+
+ if (SU->getDepth() < MinDepth) {
+ MinDepth = SU->getDepth();
+ TargetSU = SU;
+ }
+ }
+ return TargetSU;
+}
+
+void HardwareUnitInfo::insert(SUnit *SU, unsigned BlockingCycles) {
+ bool Inserted = AllSUs.insert(SU);
+ TotalCycles += BlockingCycles;
+
+ assert(Inserted);
+ if (PrioritySUs.empty()) {
+ PrioritySUs.insert(SU);
+ return;
+ }
+ unsigned SUDepth = SU->getDepth();
+ unsigned CurrDepth = (*PrioritySUs.begin())->getDepth();
+ if (SUDepth > CurrDepth)
+ return;
+
+ if (SUDepth == CurrDepth) {
+ PrioritySUs.insert(SU);
+ return;
+ }
+
+ // SU is lower depth and should be prioritized.
+ PrioritySUs.clear();
+ PrioritySUs.insert(SU);
+}
+
+void HardwareUnitInfo::schedule(SUnit *SU, unsigned BlockingCycles) {
+ // We may want to ignore some HWUIs (e.g. InstructionFlavor::Other). To do so,
+ // we just clear the HWUI. However, we still have instructions which map to
+ // this HWUI. Don't bother managing the state for these HWUI.
+ if (TotalCycles == 0)
+ return;
+
+ AllSUs.remove(SU);
+ PrioritySUs.remove(SU);
+
+ TotalCycles -= BlockingCycles;
+
+ if (AllSUs.empty())
+ return;
+ if (PrioritySUs.empty()) {
+ for (auto SU : AllSUs) {
+ if (PrioritySUs.empty()) {
+ PrioritySUs.insert(SU);
+ continue;
+ }
+ unsigned SUDepth = SU->getDepth();
+ unsigned CurrDepth = (*PrioritySUs.begin())->getDepth();
+ if (SUDepth > CurrDepth)
+ continue;
+
+ if (SUDepth == CurrDepth) {
+ PrioritySUs.insert(SU);
+ continue;
+ }
+
+ // SU is lower depth and should be prioritized.
+ PrioritySUs.clear();
+ PrioritySUs.insert(SU);
+ }
+ }
+}
+
+HardwareUnitInfo *
+CandidateHeuristics::getHWUIFromFlavor(InstructionFlavor Flavor) {
+ for (auto &HWUICand : HWUInfo) {
+ if (HWUICand.getType() == Flavor) {
+ return &HWUICand;
+ }
+ }
+ return nullptr;
+}
+
+unsigned CandidateHeuristics::getHWUICyclesForInst(SUnit *SU) {
+ if (SchedModel && SchedModel->hasInstrSchedModel()) {
+ unsigned ReleaseAtCycle = 0;
+ const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel->getWriteProcResBegin(SC),
+ PE = SchedModel->getWriteProcResEnd(SC);
+ PI != PE; ++PI) {
+ ReleaseAtCycle = std::max(ReleaseAtCycle, (unsigned)PI->ReleaseAtCycle);
+ }
+ return ReleaseAtCycle;
+ }
+ return -1;
+}
+
+void CandidateHeuristics::schedNode(SUnit *SU) {
+ HardwareUnitInfo *HWUI =
+ getHWUIFromFlavor(classifyFlavor(SU->getInstr(), SII));
+ HWUI->schedule(SU, getHWUICyclesForInst(SU));
+}
+
+void CandidateHeuristics::initialize(ScheduleDAGMI *SchedDAG,
+ const TargetSchedModel *TargetSchedModel,
+ const TargetRegisterInfo *TRI) {
+ DAG = SchedDAG;
+ SchedModel = TargetSchedModel;
+
+ SRI = static_cast<const SIRegisterInfo *>(TRI);
+ SII = static_cast<const SIInstrInfo *>(DAG->TII);
+
+ HWUInfo.resize((int)InstructionFlavor::NUM_FLAVORS);
+
+ for (unsigned I = 0; I < HWUInfo.size(); I++) {
+ HWUInfo[I].setType(I);
+ HWUInfo[I].reset();
+ }
+
+ HWUInfo[(int)InstructionFlavor::WMMA].setProducesCoexecWindow(true);
+ HWUInfo[(int)InstructionFlavor::MultiCycleVALU].setProducesCoexecWindow(true);
+ HWUInfo[(int)InstructionFlavor::TRANS].setProducesCoexecWindow(true);
+
+ collectHWUIPressure();
+}
+
+void CandidateHeuristics::collectHWUIPressure() {
+ if (!SchedModel || !SchedModel->hasInstrSchedModel())
+ return;
+
+ for (auto &SU : DAG->SUnits) {
+ InstructionFlavor Flavor = classifyFlavor(SU.getInstr(), SII);
+ HWUInfo[(int)(Flavor)].insert(&SU, getHWUICyclesForInst(&SU));
+ }
+
+ LLVM_DEBUG(dumpRegionSummary());
+}
+
+void CandidateHeuristics::dumpRegionSummary() {
+ MachineBasicBlock *BB = DAG->begin()->getParent();
+ dbgs() << "\n=== Region: " << DAG->MF.getName() << " BB" << BB->getNumber()
+ << " (" << DAG->SUnits.size() << " SUs) ===\n";
+
+ dbgs() << "\nHWUI Resource Pressure:\n";
+ for (auto &HWUI : HWUInfo) {
+ if (HWUI.getTotalCycles() == 0)
+ continue;
+
+ StringRef Name = getFlavorName(HWUI.getType());
+ dbgs() << " [" << HWUI.getIdx() << "] " << Name << ": "
+ << HWUI.getTotalCycles() << " cycles, " << HWUI.size()
+ << " instrs\n";
+ }
+ dbgs() << "\n";
+}
+
+void CandidateHeuristics::sortHWUIResources() {
----------------
arsenm wrote:
Define the predicate function separately, then directly use sort + that function
https://github.com/llvm/llvm-project/pull/184929
More information about the llvm-branch-commits
mailing list