[llvm] [AMDGPU] Add initial cost function framework for balanced scheduling (PR #160558)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 24 09:57:47 PDT 2025
github-actions[bot] wrote:
<!--LLVM CODE FORMAT COMMENT: {clang-format}-->
:warning: C/C++ code formatter, clang-format found issues in your code. :warning:
<details>
<summary>
You can test this locally with the following command:
</summary>
``````````bash
git-clang-format --diff origin/main HEAD --extensions h,cpp -- llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
``````````
:warning:
The reproduction instructions above might return results for more than one PR
in a stack if you are using a stacked PR workflow. You can limit the results by
changing `origin/main` to the base branch/commit you want to compare against.
:warning:
</details>
<details>
<summary>
View the diff from clang-format here.
</summary>
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 874dfc09a..f088e7157 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -23,9 +23,9 @@
///
//===----------------------------------------------------------------------===//
+#include "GCNSchedStrategy.h"
#include "AMDGPUIGroupLP.h"
#include "GCNRegPressure.h"
-#include "GCNSchedStrategy.h"
#include "SIMachineFunctionInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/STLExtras.h"
@@ -98,30 +98,32 @@ static cl::opt<double> SchedCostWeightSpill(
// Shape the occupancy term: reciprocal exponent and low-occupancy penalty.
static cl::opt<double> SchedCostOccExponent(
- "amdgpu-sched-cost-occ-exponent", cl::Hidden,
- cl::desc("Exponent for occupancy diminishing-returns curve (cost ~ 1/W^exp)"),
- cl::init(1.0));
+ "amdgpu-sched-cost-occ-exponent", cl::Hidden,
+ cl::desc(
+ "Exponent for occupancy diminishing-returns curve (cost ~ 1/W^exp)"),
+ cl::init(1.0));
static cl::opt<unsigned> SchedCostLowOccFloor(
- "amdgpu-sched-cost-lowocc-floor", cl::Hidden,
- cl::desc("Preferred minimum waves; waves below this get extra penalty"),
- cl::init(2));
+ "amdgpu-sched-cost-lowocc-floor", cl::Hidden,
+ cl::desc("Preferred minimum waves; waves below this get extra penalty"),
+ cl::init(2));
static cl::opt<double> SchedCostLowOccPenalty(
- "amdgpu-sched-cost-lowocc-penalty", cl::Hidden,
- cl::desc("Penalty weight multiplied by (floor - waves) when below floor"),
- cl::init(0.0));
+ "amdgpu-sched-cost-lowocc-penalty", cl::Hidden,
+ cl::desc("Penalty weight multiplied by (floor - waves) when below floor"),
+ cl::init(0.0));
static cl::opt<bool> UseStageCostDecision(
- "amdgpu-use-stage-cost-decision", cl::Hidden,
- cl::desc("Defer cost decisions to end of stage using block-frequency"
- " weighted totals, instead of per-region immediate reverts"),
- cl::init(false));
+ "amdgpu-use-stage-cost-decision", cl::Hidden,
+ cl::desc("Defer cost decisions to end of stage using block-frequency"
+ " weighted totals, instead of per-region immediate reverts"),
+ cl::init(false));
// Helper: concave occupancy utility. Map waves -> diminishing cost reduction.
static inline double occupancyCost(unsigned Waves, double Exp) {
if (Waves == 0)
return std::numeric_limits<double>::infinity();
- // Use reciprocal to get a simple concave utility: higher waves -> smaller cost.
- // We scale by a constant so typical ranges produce reasonable magnitudes.
+ // Use reciprocal to get a simple concave utility: higher waves -> smaller
+ // cost. We scale by a constant so typical ranges produce reasonable
+ // magnitudes.
return 1.0 / std::pow(static_cast<double>(Waves), Exp);
}
@@ -1379,22 +1381,27 @@ void GCNSchedStage::finalizeGCNRegion() {
auto EstimateSpill = [&](const GCNRegPressure &P) -> unsigned {
unsigned Spill = 0;
unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
- unsigned MaxArchVGPRs = std::min(MaxVGPRs, ST.getAddressableNumArchVGPRs());
+ unsigned MaxArchVGPRs =
+ std::min(MaxVGPRs, ST.getAddressableNumArchVGPRs());
unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
unsigned VG = P.getVGPRNum(ST.hasGFX90AInsts());
unsigned AG = P.getAGPRNum();
unsigned AV = P.getArchVGPRNum();
unsigned SG = P.getSGPRNum();
- if (VG > MaxVGPRs) Spill += VG - MaxVGPRs;
- if (AV > MaxArchVGPRs) Spill += AV - MaxArchVGPRs;
- if (AG > MaxArchVGPRs) Spill += AG - MaxArchVGPRs;
- if (SG > MaxSGPRs) Spill += SG - MaxSGPRs;
+ if (VG > MaxVGPRs)
+ Spill += VG - MaxVGPRs;
+ if (AV > MaxArchVGPRs)
+ Spill += AV - MaxArchVGPRs;
+ if (AG > MaxArchVGPRs)
+ Spill += AG - MaxArchVGPRs;
+ if (SG > MaxSGPRs)
+ Spill += SG - MaxSGPRs;
return Spill;
};
unsigned DynamicVGPRBlockSize = DAG.MFI.getDynamicVGPRBlockSize();
- unsigned TargetOcc = std::min(
- S.getTargetOccupancy(), ST.getOccupancyWithWorkGroupSizes(MF).second);
+ unsigned TargetOcc = std::min(S.getTargetOccupancy(),
+ ST.getOccupancyWithWorkGroupSizes(MF).second);
unsigned WavesBefore = std::min(
TargetOcc, PressureBefore.getOccupancy(ST, DynamicVGPRBlockSize));
unsigned WavesAfter = std::min(
@@ -1403,11 +1410,11 @@ void GCNSchedStage::finalizeGCNRegion() {
unsigned SpillAfter = EstimateSpill(PressureAfter);
double BlockFreq = 1.0; // TODO: wire MBFI when available
- AMDGPUSchedCostFunction CF(SchedCostWeightOccupancy,
- SchedCostWeightLength, SchedCostWeightSpill,
- SchedCostOccExponent, SchedCostLowOccFloor,
- SchedCostLowOccPenalty);
- StageCostBefore += CF.score(WavesBefore, LengthBefore, SpillBefore, BlockFreq);
+ AMDGPUSchedCostFunction CF(SchedCostWeightOccupancy, SchedCostWeightLength,
+ SchedCostWeightSpill, SchedCostOccExponent,
+ SchedCostLowOccFloor, SchedCostLowOccPenalty);
+ StageCostBefore +=
+ CF.score(WavesBefore, LengthBefore, SpillBefore, BlockFreq);
StageCostAfter += CF.score(WavesAfter, LengthAfter, SpillAfter, BlockFreq);
}
@@ -1506,7 +1513,8 @@ void GCNSchedStage::checkScheduling() {
unsigned Spill = 0;
// Excess over addressable limits captures risk of spills.
unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
- unsigned MaxArchVGPRs = std::min(MaxVGPRs, ST.getAddressableNumArchVGPRs());
+ unsigned MaxArchVGPRs =
+ std::min(MaxVGPRs, ST.getAddressableNumArchVGPRs());
unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
unsigned VG = P.getVGPRNum(ST.hasGFX90AInsts());
unsigned AG = P.getAGPRNum();
@@ -1528,8 +1536,7 @@ void GCNSchedStage::checkScheduling() {
// Occupancy for before/after.
unsigned WavesBeforeOcc = std::min(
- TargetOccupancy,
- PressureBefore.getOccupancy(ST, DynamicVGPRBlockSize));
+ TargetOccupancy, PressureBefore.getOccupancy(ST, DynamicVGPRBlockSize));
unsigned WavesAfterOcc = std::min(
TargetOccupancy, PressureAfter.getOccupancy(ST, DynamicVGPRBlockSize));
@@ -1537,12 +1544,13 @@ void GCNSchedStage::checkScheduling() {
// MachineSchedContext does not expose MBFI here, so default to 1.0.
double BlockFreq = 1.0;
- AMDGPUSchedCostFunction CF(SchedCostWeightOccupancy,
- SchedCostWeightLength, SchedCostWeightSpill,
- SchedCostOccExponent, SchedCostLowOccFloor,
- SchedCostLowOccPenalty);
- double CostBefore = CF.score(WavesBeforeOcc, LengthBefore, SpillBefore, BlockFreq);
- double CostAfter = CF.score(WavesAfterOcc, LengthAfter, SpillAfter, BlockFreq);
+ AMDGPUSchedCostFunction CF(SchedCostWeightOccupancy, SchedCostWeightLength,
+ SchedCostWeightSpill, SchedCostOccExponent,
+ SchedCostLowOccFloor, SchedCostLowOccPenalty);
+ double CostBefore =
+ CF.score(WavesBeforeOcc, LengthBefore, SpillBefore, BlockFreq);
+ double CostAfter =
+ CF.score(WavesAfterOcc, LengthAfter, SpillAfter, BlockFreq);
if (CostAfter > CostBefore)
Revert = true;
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 0e7b6d02a..7d19c290a 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -173,9 +173,9 @@ class AMDGPUSchedCostFunction {
public:
AMDGPUSchedCostFunction() = default;
- AMDGPUSchedCostFunction(double OccWeight, double LenWeight, double SpillWeight,
- double OccExponent, unsigned LowOccPrefFloor,
- double LowOccPenaltyWeight)
+ AMDGPUSchedCostFunction(double OccWeight, double LenWeight,
+ double SpillWeight, double OccExponent,
+ unsigned LowOccPrefFloor, double LowOccPenaltyWeight)
: OccW(OccWeight), LenW(LenWeight), SpillW(SpillWeight),
OccExp(OccExponent), LowOccFloor(LowOccPrefFloor),
LowOccPenalty(LowOccPenaltyWeight) {}
``````````
</details>
https://github.com/llvm/llvm-project/pull/160558
More information about the llvm-commits
mailing list