[llvm-branch-commits] [llvm] [AMDGPU] Add HWUI pressure heuristics to coexec strategy (PR #184929)

Lucas Ramirez via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Mar 11 05:27:10 PDT 2026


================
@@ -19,12 +19,297 @@
 
 namespace llvm {
 
+namespace AMDGPU {
+
+//===----------------------------------------------------------------------===//
+// Instruction Flavor Classification
+//===----------------------------------------------------------------------===//
+
+enum class InstructionFlavor : uint8_t {
+  WMMA,            // WMMA/MFMA matrix operations
+  SingleCycleVALU, // Single-cycle VALU (not TRANS32, not multi-cycle CVT)
+  TRANS,           // Transcendental ops (v_exp, v_log, etc.)
+  MultiCycleVALU,  // VALU instructions with repeat rate > 1
+  VMEM,            // FLAT/GLOBAL memory operations
+  DS,              // LDS/GDS operations
+  SALU,            // Scalar ALU
+  DMA,             // Tensor DMA operations
+  Fence,           // Fences and waits
+  Other,           // Everything else
+  NUM_FLAVORS
+};
+
+inline StringRef getFlavorName(InstructionFlavor F) {
+  switch (F) {
+  case InstructionFlavor::WMMA:
+    return "WMMA";
+  case InstructionFlavor::SingleCycleVALU:
+    return "VALU(1c)";
+  case InstructionFlavor::TRANS:
+    return "TRANS";
+  case InstructionFlavor::MultiCycleVALU:
+    return "VALU(Nc)";
+  case InstructionFlavor::VMEM:
+    return "VMEM";
+  case InstructionFlavor::DS:
+    return "DS";
+  case InstructionFlavor::SALU:
+    return "SALU";
+  case InstructionFlavor::DMA:
+    return "DMA";
+  case InstructionFlavor::Fence:
+    return "Fence";
+  case InstructionFlavor::Other:
+    return "Other";
+  case InstructionFlavor::NUM_FLAVORS:
+    return "???";
+  }
+  llvm_unreachable("Unknown InstructionFlavor");
+}
+
+inline StringRef getFlavorShortName(InstructionFlavor F) {
+  switch (F) {
+  case InstructionFlavor::WMMA:
+    return "W";
+  case InstructionFlavor::SingleCycleVALU:
+    return "V";
+  case InstructionFlavor::TRANS:
+    return "T";
+  case InstructionFlavor::MultiCycleVALU:
+    return "C";
+  case InstructionFlavor::VMEM:
+    return "M";
+  case InstructionFlavor::DS:
+    return "D";
+  case InstructionFlavor::SALU:
+    return "S";
+  case InstructionFlavor::DMA:
+    return "X";
+  case InstructionFlavor::Fence:
+    return "F";
+  case InstructionFlavor::Other:
+    return "O";
+  case InstructionFlavor::NUM_FLAVORS:
+    return "?";
+  }
+  llvm_unreachable("Unknown InstructionFlavor");
+}
+
+InstructionFlavor classifyFlavor(const MachineInstr &MI,
+                                 const SIInstrInfo &SII);
+
+using FlavorGroup = SmallVector<InstructionFlavor, 4>;
+
+namespace FlavorGroups {
+inline FlavorGroup allVALU() {
+  return {InstructionFlavor::SingleCycleVALU, InstructionFlavor::TRANS,
+          InstructionFlavor::MultiCycleVALU};
+}
+inline FlavorGroup allMem() {
+  return {InstructionFlavor::VMEM, InstructionFlavor::DS,
+          InstructionFlavor::DMA};
+}
+inline FlavorGroup individual(InstructionFlavor F) { return {F}; }
+inline FlavorGroup all() {
+  FlavorGroup G;
+  for (unsigned I = 0;
+       I < static_cast<unsigned>(InstructionFlavor::NUM_FLAVORS); ++I)
+    G.push_back(static_cast<InstructionFlavor>(I));
+  return G;
+}
+} // namespace FlavorGroups
+
+/// AMDGPU-specific scheduling decision reasons. These provide more granularity
+/// than the generic CandReason enum for debugging purposes.
+enum class AMDGPUSchedReason : uint8_t {
+  None,
+  CritResourceBalance, // tryCriticalResource chose based on resource pressure
+  CritResourceDep,     // tryCriticalResourceDependency chose based on enabling
+  NUM_REASONS
+};
+
+inline StringRef getReasonName(AMDGPUSchedReason R) {
+  switch (R) {
+  case AMDGPUSchedReason::None:
+    return "None";
+  case AMDGPUSchedReason::CritResourceBalance:
+    return "CritResource";
+  case AMDGPUSchedReason::CritResourceDep:
+    return "CritResourceDep";
+  case AMDGPUSchedReason::NUM_REASONS:
+    return "???";
+  }
+  llvm_unreachable("Unknown AMDGPUSchedReason");
+}
+
+} // End namespace AMDGPU
+
+//===----------------------------------------------------------------------===//
+// Hardware Unit Information
+//===----------------------------------------------------------------------===//
+
+/// HardwareUnitInfo is a wrapper class which maps to some real hardware
+/// resource. This is used to model hardware resource pressure per region, and
+/// guide scheduling heuristics.
+class HardwareUnitInfo {
+private:
+  /// PrioritySUs maintains a list of the SUs we want to prioritize scheduling
+  /// for this HardwareUnit. This is used for agreement between
+  /// tryCriticalResourceDependency and tryCriticalResource: we schedule the
+  /// dependencies for a SU on critical resource, then schedule that same SU on
+  /// the critical resource. This agreement results in shorter live ranges and
+  /// more regular HardwareUnit access patterns. SUs are prioritized based on
+  /// depth for top-down scheduling.
+  SmallSetVector<SUnit *, 16> PrioritySUs;
+  /// All the SUs in the region that consume this resource
+  SmallSetVector<SUnit *, 16> AllSUs;
+  /// The total number of busy cycles for this HardwareUnit for a given region.
+  unsigned TotalCycles = 0;
+  // InstructionFlavor mapping
+  AMDGPU::InstructionFlavor Type;
+  // Idx mappuing
----------------
lucas-rami wrote:

```suggestion
  /// Idx mapping.
```

https://github.com/llvm/llvm-project/pull/184929


More information about the llvm-branch-commits mailing list