[llvm-branch-commits] [llvm] [AMDGPU] Add HWUI pressure heuristics to coexec strategy (PR #184929)
Lucas Ramirez via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Mar 11 05:27:10 PDT 2026
================
@@ -19,12 +19,297 @@
namespace llvm {
+namespace AMDGPU {
+
+//===----------------------------------------------------------------------===//
+// Instruction Flavor Classification
+//===----------------------------------------------------------------------===//
+
+enum class InstructionFlavor : uint8_t {
+ WMMA, // WMMA/MFMA matrix operations
+ SingleCycleVALU, // Single-cycle VALU (not TRANS32, not multi-cycle CVT)
+ TRANS, // Transcendental ops (v_exp, v_log, etc.)
+ MultiCycleVALU, // VALU instructions with repeat rate > 1
+ VMEM, // FLAT/GLOBAL memory operations
+ DS, // LDS/GDS operations
+ SALU, // Scalar ALU
+ DMA, // Tensor DMA operations
+ Fence, // Fences and waits
+ Other, // Everything else
+ NUM_FLAVORS
+};
+
+inline StringRef getFlavorName(InstructionFlavor F) {
+ switch (F) {
+ case InstructionFlavor::WMMA:
+ return "WMMA";
+ case InstructionFlavor::SingleCycleVALU:
+ return "VALU(1c)";
+ case InstructionFlavor::TRANS:
+ return "TRANS";
+ case InstructionFlavor::MultiCycleVALU:
+ return "VALU(Nc)";
+ case InstructionFlavor::VMEM:
+ return "VMEM";
+ case InstructionFlavor::DS:
+ return "DS";
+ case InstructionFlavor::SALU:
+ return "SALU";
+ case InstructionFlavor::DMA:
+ return "DMA";
+ case InstructionFlavor::Fence:
+ return "Fence";
+ case InstructionFlavor::Other:
+ return "Other";
+ case InstructionFlavor::NUM_FLAVORS:
+ return "???";
+ }
+ llvm_unreachable("Unknown InstructionFlavor");
+}
+
+inline StringRef getFlavorShortName(InstructionFlavor F) {
+ switch (F) {
+ case InstructionFlavor::WMMA:
+ return "W";
+ case InstructionFlavor::SingleCycleVALU:
+ return "V";
+ case InstructionFlavor::TRANS:
+ return "T";
+ case InstructionFlavor::MultiCycleVALU:
+ return "C";
+ case InstructionFlavor::VMEM:
+ return "M";
+ case InstructionFlavor::DS:
+ return "D";
+ case InstructionFlavor::SALU:
+ return "S";
+ case InstructionFlavor::DMA:
+ return "X";
+ case InstructionFlavor::Fence:
+ return "F";
+ case InstructionFlavor::Other:
+ return "O";
+ case InstructionFlavor::NUM_FLAVORS:
+ return "?";
+ }
+ llvm_unreachable("Unknown InstructionFlavor");
+}
+
+InstructionFlavor classifyFlavor(const MachineInstr &MI,
+ const SIInstrInfo &SII);
+
+using FlavorGroup = SmallVector<InstructionFlavor, 4>;
+
+namespace FlavorGroups {
+inline FlavorGroup allVALU() {
+ return {InstructionFlavor::SingleCycleVALU, InstructionFlavor::TRANS,
+ InstructionFlavor::MultiCycleVALU};
+}
+inline FlavorGroup allMem() {
+ return {InstructionFlavor::VMEM, InstructionFlavor::DS,
+ InstructionFlavor::DMA};
+}
+inline FlavorGroup individual(InstructionFlavor F) { return {F}; }
+inline FlavorGroup all() {
+ FlavorGroup G;
+ for (unsigned I = 0;
+ I < static_cast<unsigned>(InstructionFlavor::NUM_FLAVORS); ++I)
+ G.push_back(static_cast<InstructionFlavor>(I));
+ return G;
+}
+} // namespace FlavorGroups
+
+/// AMDGPU-specific scheduling decision reasons. These provide more granularity
+/// than the generic CandReason enum for debugging purposes.
+enum class AMDGPUSchedReason : uint8_t {
+ None,
+ CritResourceBalance, // tryCriticalResource chose based on resource pressure
+ CritResourceDep, // tryCriticalResourceDependency chose based on enabling
+ NUM_REASONS
+};
+
+inline StringRef getReasonName(AMDGPUSchedReason R) {
+ switch (R) {
+ case AMDGPUSchedReason::None:
+ return "None";
+ case AMDGPUSchedReason::CritResourceBalance:
+ return "CritResource";
+ case AMDGPUSchedReason::CritResourceDep:
+ return "CritResourceDep";
+ case AMDGPUSchedReason::NUM_REASONS:
+ return "???";
+ }
+ llvm_unreachable("Unknown AMDGPUSchedReason");
+}
+
+} // End namespace AMDGPU
+
+//===----------------------------------------------------------------------===//
+// Hardware Unit Information
+//===----------------------------------------------------------------------===//
+
+/// HardwareUnitInfo is a wrapper class which maps to some real hardware
+/// resource. This is used to model hardware resource pressure per region, and
+/// guide scheduling heuristics.
+class HardwareUnitInfo {
+private:
+ /// PrioritySUs maintains a list of the SUs we want to prioritize scheduling
+ /// for this HardwareUnit. This is used for agreement between
+ /// tryCriticalResourceDependency and tryCriticalResource: we schedule the
+ /// dependencies for a SU on critical resource, then schedule that same SU on
+ /// the critical resource. This agreement results in shorter live ranges and
+ /// more regular HardwareUnit access patterns. SUs are prioritized based on
+ /// depth for top-down scheduling.
+ SmallSetVector<SUnit *, 16> PrioritySUs;
+ /// All the SUs in the region that consume this resource
+ SmallSetVector<SUnit *, 16> AllSUs;
+ /// The total number of busy cycles for this HardwareUnit for a given region.
+ unsigned TotalCycles = 0;
+ // InstructionFlavor mapping
+ AMDGPU::InstructionFlavor Type;
+ // Idx mappuing
----------------
lucas-rami wrote:
```suggestion
/// Idx mapping.
```
https://github.com/llvm/llvm-project/pull/184929
More information about the llvm-branch-commits
mailing list