[llvm-branch-commits] [llvm] [AMDGPU] Add HWUI pressure heuristics to coexec strategy (PR #184929)

Austin Kerbow via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Mar 9 22:52:30 PDT 2026


================
@@ -41,6 +42,370 @@ static SUnit *pickOnlyChoice(SchedBoundary &Zone) {
   return OnlyChoice;
 }
 
+InstructionFlavor llvm::AMDGPU::classifyFlavor(const MachineInstr &MI,
+                                               const SIInstrInfo &SII) {
+  if (MI.isDebugInstr())
+    return InstructionFlavor::Other;
+
+  unsigned Opc = MI.getOpcode();
+
+  // Check for specific opcodes first.
+  if (Opc == AMDGPU::ATOMIC_FENCE || Opc == AMDGPU::S_WAIT_ASYNCCNT ||
+      Opc == AMDGPU::S_WAIT_TENSORCNT || Opc == AMDGPU::S_BARRIER_WAIT ||
+      Opc == AMDGPU::S_BARRIER_SIGNAL_IMM)
+    return InstructionFlavor::Fence;
+
+  if (Opc == AMDGPU::TENSOR_LOAD_TO_LDS_D2 ||
+      Opc == AMDGPU::TENSOR_LOAD_TO_LDS ||
+      Opc == AMDGPU::GLOBAL_LOAD_ASYNC_TO_LDS_B32 ||
+      Opc == AMDGPU::GLOBAL_LOAD_ASYNC_TO_LDS_B32_SADDR)
+    return InstructionFlavor::DMA;
+
+  if (SII.isMFMAorWMMA(MI))
+    return InstructionFlavor::WMMA;
+
+  if (SII.isTRANS(MI))
+    return InstructionFlavor::TRANS;
+
+  if (SII.isVALU(MI))
+    return InstructionFlavor::SingleCycleVALU;
+
+  if (SII.isDS(MI))
+    return InstructionFlavor::DS;
+
+  if (SII.isFLAT(MI) || SII.isFLATGlobal(MI) || SII.isFLATScratch(MI))
+    return InstructionFlavor::VMEM;
+
+  if (SII.isSALU(MI))
+    return InstructionFlavor::SALU;
+
+  return InstructionFlavor::Other;
+}
+
+SUnit *HardwareUnitInfo::getNextTargetSU(bool LookDeep) {
+  for (auto *PrioritySU : PrioritySUs) {
+    if (!PrioritySU->isTopReady())
+      return PrioritySU;
+  }
+
+  if (!LookDeep)
+    return nullptr;
+
+  unsigned MinDepth = std::numeric_limits<unsigned int>::max();
+  SUnit *TargetSU = nullptr;
+  for (auto *SU : AllSUs) {
+    if (SU->isScheduled)
+      continue;
+
+    if (SU->isTopReady())
+      continue;
+
+    if (SU->getDepth() < MinDepth) {
+      MinDepth = SU->getDepth();
+      TargetSU = SU;
+    }
+  }
+  return TargetSU;
+}
+
+void HardwareUnitInfo::insert(SUnit *SU, unsigned BlockingCycles) {
+  bool Inserted = AllSUs.insert(SU);
+  TotalCycles += BlockingCycles;
+
+  assert(Inserted);
+  if (PrioritySUs.empty()) {
+    PrioritySUs.insert(SU);
+    return;
+  }
+  unsigned SUDepth = SU->getDepth();
+  unsigned CurrDepth = (*PrioritySUs.begin())->getDepth();
+  if (SUDepth > CurrDepth)
+    return;
+
+  if (SUDepth == CurrDepth) {
+    PrioritySUs.insert(SU);
+    return;
+  }
+
+  // SU is lower depth and should be prioritized.
+  PrioritySUs.clear();
+  PrioritySUs.insert(SU);
+}
+
+void HardwareUnitInfo::schedule(SUnit *SU, unsigned BlockingCycles) {
+  // We may want to ignore some HWUIs (e.g. InstructionFlavor::Other). To do so,
+  // we just clear the HWUI. However, we still have instructions which map to
+  // this HWUI. Don't bother managing the state for these HWUI.
+  if (TotalCycles == 0)
+    return;
+
+  AllSUs.remove(SU);
+  PrioritySUs.remove(SU);
+
+  TotalCycles -= BlockingCycles;
+
+  if (AllSUs.empty())
+    return;
+  if (PrioritySUs.empty()) {
+    for (auto SU : AllSUs) {
+      if (PrioritySUs.empty()) {
+        PrioritySUs.insert(SU);
+        continue;
+      }
+      unsigned SUDepth = SU->getDepth();
+      unsigned CurrDepth = (*PrioritySUs.begin())->getDepth();
+      if (SUDepth > CurrDepth)
+        continue;
+
+      if (SUDepth == CurrDepth) {
+        PrioritySUs.insert(SU);
+        continue;
+      }
+
+      // SU is lower depth and should be prioritized.
+      PrioritySUs.clear();
+      PrioritySUs.insert(SU);
+    }
+  }
+}
+
+HardwareUnitInfo *
+CandidateHeuristics::getHWUIFromFlavor(InstructionFlavor Flavor) {
+  for (auto &HWUICand : HWUInfo) {
+    if (HWUICand.getType() == Flavor) {
+      return &HWUICand;
+    }
+  }
+  return nullptr;
+}
+
+unsigned CandidateHeuristics::getHWUICyclesForInst(SUnit *SU) {
+  if (SchedModel && SchedModel->hasInstrSchedModel()) {
+    unsigned ReleaseAtCycle = 0;
+    const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+    for (TargetSchedModel::ProcResIter
+             PI = SchedModel->getWriteProcResBegin(SC),
+             PE = SchedModel->getWriteProcResEnd(SC);
+         PI != PE; ++PI) {
+      ReleaseAtCycle = std::max(ReleaseAtCycle, (unsigned)PI->ReleaseAtCycle);
+    }
+    return ReleaseAtCycle;
+  }
+  return -1;
----------------
kerbowa wrote:

Is returning -1 unsigned intentional?

https://github.com/llvm/llvm-project/pull/184929


More information about the llvm-branch-commits mailing list