[llvm] [AMDGPU] Add initial cost function framework for balanced scheduling (PR #160558)

Wed Sep 24 09:57:47 PDT 2025

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. :warning:

<details>
<summary>
You can test this locally with the following command:
</summary>

``````````bash
git-clang-format --diff origin/main HEAD --extensions h,cpp -- llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
``````````

:warning:
The reproduction instructions above might return results for more than one PR
in a stack if you are using a stacked PR workflow. You can limit the results by
changing `origin/main` to the base branch/commit you want to compare against.
:warning:

</details>

<details>
<summary>
View the diff from clang-format here.
</summary>

``````````diff

diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 874dfc09a..f088e7157 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -23,9 +23,9 @@
 ///
 //===----------------------------------------------------------------------===//
 
+#include "GCNSchedStrategy.h"
 #include "AMDGPUIGroupLP.h"
 #include "GCNRegPressure.h"
-#include "GCNSchedStrategy.h"
 #include "SIMachineFunctionInfo.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/ADT/STLExtras.h"
@@ -98,30 +98,32 @@ static cl::opt<double> SchedCostWeightSpill(
 
 // Shape the occupancy term: reciprocal exponent and low-occupancy penalty.
 static cl::opt<double> SchedCostOccExponent(
-  "amdgpu-sched-cost-occ-exponent", cl::Hidden,
-  cl::desc("Exponent for occupancy diminishing-returns curve (cost ~ 1/W^exp)"),
-  cl::init(1.0));
+    "amdgpu-sched-cost-occ-exponent", cl::Hidden,
+    cl::desc(
+        "Exponent for occupancy diminishing-returns curve (cost ~ 1/W^exp)"),
+    cl::init(1.0));
 static cl::opt<unsigned> SchedCostLowOccFloor(
-  "amdgpu-sched-cost-lowocc-floor", cl::Hidden,
-  cl::desc("Preferred minimum waves; waves below this get extra penalty"),
-  cl::init(2));
+    "amdgpu-sched-cost-lowocc-floor", cl::Hidden,
+    cl::desc("Preferred minimum waves; waves below this get extra penalty"),
+    cl::init(2));
 static cl::opt<double> SchedCostLowOccPenalty(
-  "amdgpu-sched-cost-lowocc-penalty", cl::Hidden,
-  cl::desc("Penalty weight multiplied by (floor - waves) when below floor"),
-  cl::init(0.0));
+    "amdgpu-sched-cost-lowocc-penalty", cl::Hidden,
+    cl::desc("Penalty weight multiplied by (floor - waves) when below floor"),
+    cl::init(0.0));
 
 static cl::opt<bool> UseStageCostDecision(
-  "amdgpu-use-stage-cost-decision", cl::Hidden,
-  cl::desc("Defer cost decisions to end of stage using block-frequency"
-       " weighted totals, instead of per-region immediate reverts"),
-  cl::init(false));
+    "amdgpu-use-stage-cost-decision", cl::Hidden,
+    cl::desc("Defer cost decisions to end of stage using block-frequency"
+             " weighted totals, instead of per-region immediate reverts"),
+    cl::init(false));
 
 // Helper: concave occupancy utility. Map waves -> diminishing cost reduction.
 static inline double occupancyCost(unsigned Waves, double Exp) {
   if (Waves == 0)
     return std::numeric_limits<double>::infinity();
-  // Use reciprocal to get a simple concave utility: higher waves -> smaller cost.
-  // We scale by a constant so typical ranges produce reasonable magnitudes.
+  // Use reciprocal to get a simple concave utility: higher waves -> smaller
+  // cost. We scale by a constant so typical ranges produce reasonable
+  // magnitudes.
   return 1.0 / std::pow(static_cast<double>(Waves), Exp);
 }
 
@@ -1379,22 +1381,27 @@ void GCNSchedStage::finalizeGCNRegion() {
     auto EstimateSpill = [&](const GCNRegPressure &P) -> unsigned {
       unsigned Spill = 0;
       unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
-      unsigned MaxArchVGPRs = std::min(MaxVGPRs, ST.getAddressableNumArchVGPRs());
+      unsigned MaxArchVGPRs =
+          std::min(MaxVGPRs, ST.getAddressableNumArchVGPRs());
       unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
       unsigned VG = P.getVGPRNum(ST.hasGFX90AInsts());
       unsigned AG = P.getAGPRNum();
       unsigned AV = P.getArchVGPRNum();
       unsigned SG = P.getSGPRNum();
-      if (VG > MaxVGPRs) Spill += VG - MaxVGPRs;
-      if (AV > MaxArchVGPRs) Spill += AV - MaxArchVGPRs;
-      if (AG > MaxArchVGPRs) Spill += AG - MaxArchVGPRs;
-      if (SG > MaxSGPRs) Spill += SG - MaxSGPRs;
+      if (VG > MaxVGPRs)
+        Spill += VG - MaxVGPRs;
+      if (AV > MaxArchVGPRs)
+        Spill += AV - MaxArchVGPRs;
+      if (AG > MaxArchVGPRs)
+        Spill += AG - MaxArchVGPRs;
+      if (SG > MaxSGPRs)
+        Spill += SG - MaxSGPRs;
       return Spill;
     };
 
     unsigned DynamicVGPRBlockSize = DAG.MFI.getDynamicVGPRBlockSize();
-    unsigned TargetOcc = std::min(
-        S.getTargetOccupancy(), ST.getOccupancyWithWorkGroupSizes(MF).second);
+    unsigned TargetOcc = std::min(S.getTargetOccupancy(),
+                                  ST.getOccupancyWithWorkGroupSizes(MF).second);
     unsigned WavesBefore = std::min(
         TargetOcc, PressureBefore.getOccupancy(ST, DynamicVGPRBlockSize));
     unsigned WavesAfter = std::min(
@@ -1403,11 +1410,11 @@ void GCNSchedStage::finalizeGCNRegion() {
     unsigned SpillAfter = EstimateSpill(PressureAfter);
 
     double BlockFreq = 1.0; // TODO: wire MBFI when available
-  AMDGPUSchedCostFunction CF(SchedCostWeightOccupancy,
-                 SchedCostWeightLength, SchedCostWeightSpill,
-                 SchedCostOccExponent, SchedCostLowOccFloor,
-                 SchedCostLowOccPenalty);
-    StageCostBefore += CF.score(WavesBefore, LengthBefore, SpillBefore, BlockFreq);
+    AMDGPUSchedCostFunction CF(SchedCostWeightOccupancy, SchedCostWeightLength,
+                               SchedCostWeightSpill, SchedCostOccExponent,
+                               SchedCostLowOccFloor, SchedCostLowOccPenalty);
+    StageCostBefore +=
+        CF.score(WavesBefore, LengthBefore, SpillBefore, BlockFreq);
     StageCostAfter += CF.score(WavesAfter, LengthAfter, SpillAfter, BlockFreq);
   }
 
@@ -1506,7 +1513,8 @@ void GCNSchedStage::checkScheduling() {
       unsigned Spill = 0;
       // Excess over addressable limits captures risk of spills.
       unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
-      unsigned MaxArchVGPRs = std::min(MaxVGPRs, ST.getAddressableNumArchVGPRs());
+      unsigned MaxArchVGPRs =
+          std::min(MaxVGPRs, ST.getAddressableNumArchVGPRs());
       unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
       unsigned VG = P.getVGPRNum(ST.hasGFX90AInsts());
       unsigned AG = P.getAGPRNum();
@@ -1528,8 +1536,7 @@ void GCNSchedStage::checkScheduling() {
 
     // Occupancy for before/after.
     unsigned WavesBeforeOcc = std::min(
-        TargetOccupancy,
-        PressureBefore.getOccupancy(ST, DynamicVGPRBlockSize));
+        TargetOccupancy, PressureBefore.getOccupancy(ST, DynamicVGPRBlockSize));
     unsigned WavesAfterOcc = std::min(
         TargetOccupancy, PressureAfter.getOccupancy(ST, DynamicVGPRBlockSize));
 
@@ -1537,12 +1544,13 @@ void GCNSchedStage::checkScheduling() {
     // MachineSchedContext does not expose MBFI here, so default to 1.0.
     double BlockFreq = 1.0;
 
-  AMDGPUSchedCostFunction CF(SchedCostWeightOccupancy,
-                 SchedCostWeightLength, SchedCostWeightSpill,
-                 SchedCostOccExponent, SchedCostLowOccFloor,
-                 SchedCostLowOccPenalty);
-    double CostBefore = CF.score(WavesBeforeOcc, LengthBefore, SpillBefore, BlockFreq);
-    double CostAfter = CF.score(WavesAfterOcc, LengthAfter, SpillAfter, BlockFreq);
+    AMDGPUSchedCostFunction CF(SchedCostWeightOccupancy, SchedCostWeightLength,
+                               SchedCostWeightSpill, SchedCostOccExponent,
+                               SchedCostLowOccFloor, SchedCostLowOccPenalty);
+    double CostBefore =
+        CF.score(WavesBeforeOcc, LengthBefore, SpillBefore, BlockFreq);
+    double CostAfter =
+        CF.score(WavesAfterOcc, LengthAfter, SpillAfter, BlockFreq);
 
     if (CostAfter > CostBefore)
       Revert = true;
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 0e7b6d02a..7d19c290a 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -173,9 +173,9 @@ class AMDGPUSchedCostFunction {
 
 public:
   AMDGPUSchedCostFunction() = default;
-  AMDGPUSchedCostFunction(double OccWeight, double LenWeight, double SpillWeight,
-                          double OccExponent, unsigned LowOccPrefFloor,
-                          double LowOccPenaltyWeight)
+  AMDGPUSchedCostFunction(double OccWeight, double LenWeight,
+                          double SpillWeight, double OccExponent,
+                          unsigned LowOccPrefFloor, double LowOccPenaltyWeight)
       : OccW(OccWeight), LenW(LenWeight), SpillW(SpillWeight),
         OccExp(OccExponent), LowOccFloor(LowOccPrefFloor),
         LowOccPenalty(LowOccPenaltyWeight) {}

``````````

</details>


https://github.com/llvm/llvm-project/pull/160558