[llvm-branch-commits] [llvm] [AMDGPU] Add HWUI pressure heuristics to coexec strategy (PR #184929)
Lucas Ramirez via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Mar 11 05:27:09 PDT 2026
================
@@ -19,12 +19,297 @@
namespace llvm {
+namespace AMDGPU {
+
+//===----------------------------------------------------------------------===//
+// Instruction Flavor Classification
+//===----------------------------------------------------------------------===//
+
+enum class InstructionFlavor : uint8_t {
+ WMMA, // WMMA/MFMA matrix operations
+ SingleCycleVALU, // Single-cycle VALU (not TRANS32, not multi-cycle CVT)
+ TRANS, // Transcendental ops (v_exp, v_log, etc.)
+ MultiCycleVALU, // VALU instructions with repeat rate > 1
+ VMEM, // FLAT/GLOBAL memory operations
+ DS, // LDS/GDS operations
+ SALU, // Scalar ALU
+ DMA, // Tensor DMA operations
+ Fence, // Fences and waits
+ Other, // Everything else
+ NUM_FLAVORS
+};
+
+inline StringRef getFlavorName(InstructionFlavor F) {
+ switch (F) {
+ case InstructionFlavor::WMMA:
+ return "WMMA";
+ case InstructionFlavor::SingleCycleVALU:
+ return "VALU(1c)";
+ case InstructionFlavor::TRANS:
+ return "TRANS";
+ case InstructionFlavor::MultiCycleVALU:
+ return "VALU(Nc)";
+ case InstructionFlavor::VMEM:
+ return "VMEM";
+ case InstructionFlavor::DS:
+ return "DS";
+ case InstructionFlavor::SALU:
+ return "SALU";
+ case InstructionFlavor::DMA:
+ return "DMA";
+ case InstructionFlavor::Fence:
+ return "Fence";
+ case InstructionFlavor::Other:
+ return "Other";
+ case InstructionFlavor::NUM_FLAVORS:
+ return "???";
+ }
+ llvm_unreachable("Unknown InstructionFlavor");
+}
+
+inline StringRef getFlavorShortName(InstructionFlavor F) {
+ switch (F) {
+ case InstructionFlavor::WMMA:
+ return "W";
+ case InstructionFlavor::SingleCycleVALU:
+ return "V";
+ case InstructionFlavor::TRANS:
+ return "T";
+ case InstructionFlavor::MultiCycleVALU:
+ return "C";
+ case InstructionFlavor::VMEM:
+ return "M";
+ case InstructionFlavor::DS:
+ return "D";
+ case InstructionFlavor::SALU:
+ return "S";
+ case InstructionFlavor::DMA:
+ return "X";
+ case InstructionFlavor::Fence:
+ return "F";
+ case InstructionFlavor::Other:
+ return "O";
+ case InstructionFlavor::NUM_FLAVORS:
+ return "?";
+ }
+ llvm_unreachable("Unknown InstructionFlavor");
+}
+
+InstructionFlavor classifyFlavor(const MachineInstr &MI,
+ const SIInstrInfo &SII);
+
+using FlavorGroup = SmallVector<InstructionFlavor, 4>;
+
+namespace FlavorGroups {
+inline FlavorGroup allVALU() {
+ return {InstructionFlavor::SingleCycleVALU, InstructionFlavor::TRANS,
+ InstructionFlavor::MultiCycleVALU};
+}
+inline FlavorGroup allMem() {
+ return {InstructionFlavor::VMEM, InstructionFlavor::DS,
+ InstructionFlavor::DMA};
+}
+inline FlavorGroup individual(InstructionFlavor F) { return {F}; }
+inline FlavorGroup all() {
+ FlavorGroup G;
+ for (unsigned I = 0;
+ I < static_cast<unsigned>(InstructionFlavor::NUM_FLAVORS); ++I)
+ G.push_back(static_cast<InstructionFlavor>(I));
+ return G;
+}
+} // namespace FlavorGroups
+
+/// AMDGPU-specific scheduling decision reasons. These provide more granularity
+/// than the generic CandReason enum for debugging purposes.
+enum class AMDGPUSchedReason : uint8_t {
+ None,
+ CritResourceBalance, // tryCriticalResource chose based on resource pressure
+ CritResourceDep, // tryCriticalResourceDependency chose based on enabling
+ NUM_REASONS
+};
+
+inline StringRef getReasonName(AMDGPUSchedReason R) {
+ switch (R) {
+ case AMDGPUSchedReason::None:
+ return "None";
+ case AMDGPUSchedReason::CritResourceBalance:
+ return "CritResource";
+ case AMDGPUSchedReason::CritResourceDep:
+ return "CritResourceDep";
+ case AMDGPUSchedReason::NUM_REASONS:
+ return "???";
+ }
+ llvm_unreachable("Unknown AMDGPUSchedReason");
+}
+
+} // End namespace AMDGPU
+
+//===----------------------------------------------------------------------===//
+// Hardware Unit Information
+//===----------------------------------------------------------------------===//
+
+/// HardwareUnitInfo is a wrapper class which maps to some real hardware
+/// resource. This is used to model hardware resource pressure per region, and
+/// guide scheduling heuristics.
+class HardwareUnitInfo {
+private:
+ /// PrioritySUs maintains a list of the SUs we want to prioritize scheduling
+ /// for this HardwareUnit. This is used for agreement between
+ /// tryCriticalResourceDependency and tryCriticalResource: we schedule the
+ /// dependencies for a SU on critical resource, then schedule that same SU on
+ /// the critical resource. This agreement results in shorter live ranges and
+ /// more regular HardwareUnit access patterns. SUs are prioritized based on
+ /// depth for top-down scheduling.
+ SmallSetVector<SUnit *, 16> PrioritySUs;
+ /// All the SUs in the region that consume this resource
+ SmallSetVector<SUnit *, 16> AllSUs;
+ /// The total number of busy cycles for this HardwareUnit for a given region.
+ unsigned TotalCycles = 0;
+ // InstructionFlavor mapping
+ AMDGPU::InstructionFlavor Type;
+ // Idx mappuing
+ unsigned Idx;
+ // Whether or not instructions on this HardwareUnit may produce a window in
+ // which instructions in other HardwareUnits can coexecute. For example, WMMA
+ // / MFMA instructions may take multiple cycles, which may be overlapped with
+ // instructions on other HardwareUnits
+ bool ProducesCoexecWindow = false;
+
+public:
+ HardwareUnitInfo() {}
+
+ unsigned size() { return AllSUs.size(); }
+
+ unsigned getTotalCycles() { return TotalCycles; }
+
+ void setType(unsigned TheType) {
+ assert(TheType < (unsigned)AMDGPU::InstructionFlavor::NUM_FLAVORS);
+ Type = (AMDGPU::InstructionFlavor)(TheType);
+ }
+
+ AMDGPU::InstructionFlavor getType() const { return Type; }
+
+ unsigned getIdx() const { return Idx; }
+
+ bool producesCoexecWindow() const { return ProducesCoexecWindow; }
+
+ void setProducesCoexecWindow(bool Val) { ProducesCoexecWindow = Val; }
+
+ bool contains(SUnit *SU) { return AllSUs.contains(SU); }
+
+ /// \returns trrue if there is a difference in priority between \p SU and \p
+ /// Other. If so, \returns the SUnit with higher priority. This
+ /// method looks through the PrioritySUs to dtermine if one SU is more
+ /// prioritized than the other. If neither are in the PrioritySUs list, then
+ /// neither have priority over each other.
+ SUnit *getHigherPriority(SUnit *SU, SUnit *Other) {
+ for (auto *SUOrder : PrioritySUs) {
+ if (SUOrder == SU) {
+ return SU;
+ }
+ if (SUOrder == Other) {
+ return Other;
+ }
----------------
lucas-rami wrote:
```suggestion
if (SUOrder == SU)
return SU;
if (SUOrder == Other)
return Other;
```
https://github.com/llvm/llvm-project/pull/184929
More information about the llvm-branch-commits
mailing list