[llvm] [NFCI][AMDGPU] Try to use PressureDiff to Calculate RegPressure. (PR #94221)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 3 06:48:04 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Pierre van Houtryve (Pierre-vh)
<details>
<summary>Changes</summary>
PressureDiff is reliable most of the time, and it's pretty much free compared to RPTracker. We can use it whenever there is no subregister definitions, or physregs invovled. No subregs because PDiff doesn't take into account lane liveness, and no Physreg because it seems to get PhysReg liveness completely wrong. Sometimes it adds a diff, sometimes itt doesn't - I didn't look at that one for long so maybe there is something we can eventually do to make it better.
This allows us to save a ton of calls to RPTracker and LIS too. On a huge IR module (100+MB), it went from about 20M calls to RPTracker down to 3.4, with the rest being PressureDiffs.
I added an expensive check to verify correctness of PressureDiff too.
---
Full diff: https://github.com/llvm/llvm-project/pull/94221.diff
2 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp (+82-21)
- (modified) llvm/lib/Target/AMDGPU/GCNSchedStrategy.h (+5-5)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 94d93390d0916..5d6e8ff8ef34d 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -116,31 +116,86 @@ void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
<< ", SGPRExcessLimit = " << SGPRExcessLimit << "\n\n");
}
+static bool canUsePressureDiffs(SUnit *SU) {
+ if (SU->isInstr()) {
+ // Cannot use pressure diffs for subregister defs or with physregs, it's
+ // imprecise in both cases.
+ for (const auto &Op : SU->getInstr()->operands()) {
+ if (!Op.isReg() || Op.isImplicit())
+ continue;
+ if (Op.getReg().isPhysical() ||
+ (Op.isDef() && Op.getSubReg() != AMDGPU::NoSubRegister))
+ return false;
+ }
+ return true;
+ }
+
+ return false;
+}
+
+static void getRegisterPressures(bool AtTop,
+ const RegPressureTracker &RPTracker, SUnit *SU,
+ std::vector<unsigned> &Pressure,
+ std::vector<unsigned> &MaxPressure) {
+ // getDownwardPressure() and getUpwardPressure() make temporary changes to
+ // the tracker, so we need to pass those function a non-const copy.
+ RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
+ if (AtTop)
+ TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
+ else
+ TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
+}
+
void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
bool AtTop,
const RegPressureTracker &RPTracker,
const SIRegisterInfo *SRI,
unsigned SGPRPressure,
- unsigned VGPRPressure) {
+ unsigned VGPRPressure, bool IsBottomUp) {
Cand.SU = SU;
Cand.AtTop = AtTop;
if (!DAG->isTrackingPressure())
return;
- // getDownwardPressure() and getUpwardPressure() make temporary changes to
- // the tracker, so we need to pass those function a non-const copy.
- RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
-
Pressure.clear();
MaxPressure.clear();
- if (AtTop)
- TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
- else {
- // FIXME: I think for bottom up scheduling, the register pressure is cached
- // and can be retrieved by DAG->getPressureDif(SU).
- TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
+ if (AtTop || !canUsePressureDiffs(SU)) {
+ getRegisterPressures(AtTop, RPTracker, SU, Pressure, MaxPressure);
+ } else {
+ // Reserve 4 slots.
+ Pressure.resize(4, 0);
+ Pressure[AMDGPU::RegisterPressureSets::SReg_32] = SGPRPressure;
+ Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = VGPRPressure;
+
+ for (const auto &Diff : DAG->getPressureDiff(SU)) {
+ if (!Diff.isValid())
+ continue;
+ // PressureDiffs is always bottom-up so if we're working top-down we need
+ // to invert its sign.
+ Pressure[Diff.getPSet()] +=
+ (IsBottomUp ? Diff.getUnitInc() : -Diff.getUnitInc());
+ }
+
+#ifdef EXPENSIVE_CHECKS
+ std::vector<unsigned> CheckPressure, CheckMaxPressure;
+ getRegisterPressures(AtTop, RPTracker, SU, CheckPressure, CheckMaxPressure);
+ if (Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=
+ CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||
+ Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=
+ CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32]) {
+ errs() << "Register Pressure is innaccurate when calculated through "
+ "PressureDiff\n";
+ errs() << "SGPR got " << Pressure[AMDGPU::RegisterPressureSets::SReg_32]
+ << ", expected "
+ << CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] << "\n";
+ errs() << "VGPR got " << Pressure[AMDGPU::RegisterPressureSets::VGPR_32]
+ << ", expected "
+ << CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n";
+ report_fatal_error("innaccurate register pressure calculation");
+ }
+#endif
}
unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
@@ -158,7 +213,6 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
-
// FIXME: We have to enter REG-EXCESS before we reach the actual threshold
// to increase the likelihood we don't go over the limits. We should improve
// the analysis to look through dependencies to find the path with the least
@@ -207,7 +261,8 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
const CandPolicy &ZonePolicy,
const RegPressureTracker &RPTracker,
- SchedCandidate &Cand) {
+ SchedCandidate &Cand,
+ bool IsBottomUp) {
const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
unsigned SGPRPressure = 0;
@@ -220,8 +275,8 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
for (SUnit *SU : Q) {
SchedCandidate TryCand(ZonePolicy);
- initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI,
- SGPRPressure, VGPRPressure);
+ initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure,
+ VGPRPressure, IsBottomUp);
// Pass SchedBoundary only when comparing nodes from the same boundary.
SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
tryCandidate(Cand, TryCand, ZoneArg);
@@ -262,7 +317,8 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
if (!BotCand.isValid() || BotCand.SU->isScheduled ||
BotCand.Policy != BotPolicy) {
BotCand.reset(CandPolicy());
- pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
+ pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand,
+ /*IsBottomUp*/ true);
assert(BotCand.Reason != NoCand && "failed to find the first candidate");
} else {
LLVM_DEBUG(traceCandidate(BotCand));
@@ -270,7 +326,8 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
if (VerifyScheduling) {
SchedCandidate TCand;
TCand.reset(CandPolicy());
- pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
+ pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand,
+ /*IsBottomUp*/ true);
assert(TCand.SU == BotCand.SU &&
"Last pick result should correspond to re-picking right now");
}
@@ -282,7 +339,8 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
if (!TopCand.isValid() || TopCand.SU->isScheduled ||
TopCand.Policy != TopPolicy) {
TopCand.reset(CandPolicy());
- pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
+ pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand,
+ /*IsBottomUp*/ false);
assert(TopCand.Reason != NoCand && "failed to find the first candidate");
} else {
LLVM_DEBUG(traceCandidate(TopCand));
@@ -290,7 +348,8 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
if (VerifyScheduling) {
SchedCandidate TCand;
TCand.reset(CandPolicy());
- pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
+ pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand,
+ /*IsBottomUp*/ false);
assert(TCand.SU == TopCand.SU &&
"Last pick result should correspond to re-picking right now");
}
@@ -327,7 +386,8 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
if (!SU) {
CandPolicy NoPolicy;
TopCand.reset(NoPolicy);
- pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
+ pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand,
+ /*IsBottomUp*/ false);
assert(TopCand.Reason != NoCand && "failed to find a candidate");
SU = TopCand.SU;
}
@@ -337,7 +397,8 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
if (!SU) {
CandPolicy NoPolicy;
BotCand.reset(NoPolicy);
- pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
+ pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand,
+ /*IsBottomUp*/ true);
assert(BotCand.Reason != NoCand && "failed to find a candidate");
SU = BotCand.SU;
}
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 2084aae4128ff..f0aea2bc4ab86 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -45,12 +45,12 @@ class GCNSchedStrategy : public GenericScheduler {
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
const RegPressureTracker &RPTracker,
- SchedCandidate &Cand);
+ SchedCandidate &Cand, bool IsBottomUp);
- void initCandidate(SchedCandidate &Cand, SUnit *SU,
- bool AtTop, const RegPressureTracker &RPTracker,
- const SIRegisterInfo *SRI,
- unsigned SGPRPressure, unsigned VGPRPressure);
+ void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop,
+ const RegPressureTracker &RPTracker,
+ const SIRegisterInfo *SRI, unsigned SGPRPressure,
+ unsigned VGPRPressure, bool IsBottomUp);
std::vector<unsigned> Pressure;
``````````
</details>
https://github.com/llvm/llvm-project/pull/94221
More information about the llvm-commits
mailing list