[llvm] [NFCI][AMDGPU] Try to use PressureDiff to Calculate RegPressure. (PR #94221)
Pierre van Houtryve via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 12 23:41:20 PDT 2024
https://github.com/Pierre-vh updated https://github.com/llvm/llvm-project/pull/94221
>From ecb727c73e1e15f3b03696510846b70ed006cff6 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Mon, 3 Jun 2024 14:43:49 +0200
Subject: [PATCH 1/3] [NFCI][AMDGPU] Try to use PressureDiff to Calculate
RegPressure.
PressureDiff is reliable most of the time, and it's pretty much free compared to RPTracker.
We can use it whenever there is no subregister definitions, or physregs invovled. No subregs because PDiff doesn't take into account lane liveness, and no Physreg because it seems to get PhysReg liveness completely wrong. Sometimes it adds a diff, sometimes itt doesn't - I didn't look at that one for long so maybe there is something we can eventually do to make it better.
This allows us to save a ton of calls to RPTracker and LIS too. On a huge IR module (100+MB), it went from about 20M calls to RPTracker down to 3.4, with the rest being PressureDiffs.
I added an expensive check to verify correctness of PressureDiff too.
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 103 ++++++++++++++++----
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 10 +-
2 files changed, 87 insertions(+), 26 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 94d93390d0916..5d6e8ff8ef34d 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -116,31 +116,86 @@ void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
<< ", SGPRExcessLimit = " << SGPRExcessLimit << "\n\n");
}
+static bool canUsePressureDiffs(SUnit *SU) {
+ if (SU->isInstr()) {
+ // Cannot use pressure diffs for subregister defs or with physregs, it's
+ // imprecise in both cases.
+ for (const auto &Op : SU->getInstr()->operands()) {
+ if (!Op.isReg() || Op.isImplicit())
+ continue;
+ if (Op.getReg().isPhysical() ||
+ (Op.isDef() && Op.getSubReg() != AMDGPU::NoSubRegister))
+ return false;
+ }
+ return true;
+ }
+
+ return false;
+}
+
+static void getRegisterPressures(bool AtTop,
+ const RegPressureTracker &RPTracker, SUnit *SU,
+ std::vector<unsigned> &Pressure,
+ std::vector<unsigned> &MaxPressure) {
+ // getDownwardPressure() and getUpwardPressure() make temporary changes to
+ // the tracker, so we need to pass those function a non-const copy.
+ RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
+ if (AtTop)
+ TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
+ else
+ TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
+}
+
void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
bool AtTop,
const RegPressureTracker &RPTracker,
const SIRegisterInfo *SRI,
unsigned SGPRPressure,
- unsigned VGPRPressure) {
+ unsigned VGPRPressure, bool IsBottomUp) {
Cand.SU = SU;
Cand.AtTop = AtTop;
if (!DAG->isTrackingPressure())
return;
- // getDownwardPressure() and getUpwardPressure() make temporary changes to
- // the tracker, so we need to pass those function a non-const copy.
- RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
-
Pressure.clear();
MaxPressure.clear();
- if (AtTop)
- TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
- else {
- // FIXME: I think for bottom up scheduling, the register pressure is cached
- // and can be retrieved by DAG->getPressureDif(SU).
- TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
+ if (AtTop || !canUsePressureDiffs(SU)) {
+ getRegisterPressures(AtTop, RPTracker, SU, Pressure, MaxPressure);
+ } else {
+ // Reserve 4 slots.
+ Pressure.resize(4, 0);
+ Pressure[AMDGPU::RegisterPressureSets::SReg_32] = SGPRPressure;
+ Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = VGPRPressure;
+
+ for (const auto &Diff : DAG->getPressureDiff(SU)) {
+ if (!Diff.isValid())
+ continue;
+ // PressureDiffs is always bottom-up so if we're working top-down we need
+ // to invert its sign.
+ Pressure[Diff.getPSet()] +=
+ (IsBottomUp ? Diff.getUnitInc() : -Diff.getUnitInc());
+ }
+
+#ifdef EXPENSIVE_CHECKS
+ std::vector<unsigned> CheckPressure, CheckMaxPressure;
+ getRegisterPressures(AtTop, RPTracker, SU, CheckPressure, CheckMaxPressure);
+ if (Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=
+ CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||
+ Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=
+ CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32]) {
+ errs() << "Register Pressure is innaccurate when calculated through "
+ "PressureDiff\n";
+ errs() << "SGPR got " << Pressure[AMDGPU::RegisterPressureSets::SReg_32]
+ << ", expected "
+ << CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] << "\n";
+ errs() << "VGPR got " << Pressure[AMDGPU::RegisterPressureSets::VGPR_32]
+ << ", expected "
+ << CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n";
+ report_fatal_error("innaccurate register pressure calculation");
+ }
+#endif
}
unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
@@ -158,7 +213,6 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
-
// FIXME: We have to enter REG-EXCESS before we reach the actual threshold
// to increase the likelihood we don't go over the limits. We should improve
// the analysis to look through dependencies to find the path with the least
@@ -207,7 +261,8 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
const CandPolicy &ZonePolicy,
const RegPressureTracker &RPTracker,
- SchedCandidate &Cand) {
+ SchedCandidate &Cand,
+ bool IsBottomUp) {
const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
unsigned SGPRPressure = 0;
@@ -220,8 +275,8 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
for (SUnit *SU : Q) {
SchedCandidate TryCand(ZonePolicy);
- initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI,
- SGPRPressure, VGPRPressure);
+ initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure,
+ VGPRPressure, IsBottomUp);
// Pass SchedBoundary only when comparing nodes from the same boundary.
SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
tryCandidate(Cand, TryCand, ZoneArg);
@@ -262,7 +317,8 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
if (!BotCand.isValid() || BotCand.SU->isScheduled ||
BotCand.Policy != BotPolicy) {
BotCand.reset(CandPolicy());
- pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
+ pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand,
+ /*IsBottomUp*/ true);
assert(BotCand.Reason != NoCand && "failed to find the first candidate");
} else {
LLVM_DEBUG(traceCandidate(BotCand));
@@ -270,7 +326,8 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
if (VerifyScheduling) {
SchedCandidate TCand;
TCand.reset(CandPolicy());
- pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
+ pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand,
+ /*IsBottomUp*/ true);
assert(TCand.SU == BotCand.SU &&
"Last pick result should correspond to re-picking right now");
}
@@ -282,7 +339,8 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
if (!TopCand.isValid() || TopCand.SU->isScheduled ||
TopCand.Policy != TopPolicy) {
TopCand.reset(CandPolicy());
- pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
+ pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand,
+ /*IsBottomUp*/ false);
assert(TopCand.Reason != NoCand && "failed to find the first candidate");
} else {
LLVM_DEBUG(traceCandidate(TopCand));
@@ -290,7 +348,8 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
if (VerifyScheduling) {
SchedCandidate TCand;
TCand.reset(CandPolicy());
- pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
+ pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand,
+ /*IsBottomUp*/ false);
assert(TCand.SU == TopCand.SU &&
"Last pick result should correspond to re-picking right now");
}
@@ -327,7 +386,8 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
if (!SU) {
CandPolicy NoPolicy;
TopCand.reset(NoPolicy);
- pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
+ pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand,
+ /*IsBottomUp*/ false);
assert(TopCand.Reason != NoCand && "failed to find a candidate");
SU = TopCand.SU;
}
@@ -337,7 +397,8 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
if (!SU) {
CandPolicy NoPolicy;
BotCand.reset(NoPolicy);
- pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
+ pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand,
+ /*IsBottomUp*/ true);
assert(BotCand.Reason != NoCand && "failed to find a candidate");
SU = BotCand.SU;
}
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 2084aae4128ff..f0aea2bc4ab86 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -45,12 +45,12 @@ class GCNSchedStrategy : public GenericScheduler {
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
const RegPressureTracker &RPTracker,
- SchedCandidate &Cand);
+ SchedCandidate &Cand, bool IsBottomUp);
- void initCandidate(SchedCandidate &Cand, SUnit *SU,
- bool AtTop, const RegPressureTracker &RPTracker,
- const SIRegisterInfo *SRI,
- unsigned SGPRPressure, unsigned VGPRPressure);
+ void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop,
+ const RegPressureTracker &RPTracker,
+ const SIRegisterInfo *SRI, unsigned SGPRPressure,
+ unsigned VGPRPressure, bool IsBottomUp);
std::vector<unsigned> Pressure;
>From 902b8fa000b8d884b13557c8bb6cbf6ec5248c6c Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Tue, 11 Jun 2024 10:02:45 +0200
Subject: [PATCH 2/3] Comments
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 45 ++++++++++-----------
1 file changed, 22 insertions(+), 23 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 5d6e8ff8ef34d..a7fc273317c97 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -116,21 +116,20 @@ void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
<< ", SGPRExcessLimit = " << SGPRExcessLimit << "\n\n");
}
-static bool canUsePressureDiffs(SUnit *SU) {
- if (SU->isInstr()) {
- // Cannot use pressure diffs for subregister defs or with physregs, it's
- // imprecise in both cases.
- for (const auto &Op : SU->getInstr()->operands()) {
- if (!Op.isReg() || Op.isImplicit())
- continue;
- if (Op.getReg().isPhysical() ||
- (Op.isDef() && Op.getSubReg() != AMDGPU::NoSubRegister))
- return false;
- }
- return true;
- }
+static bool canUsePressureDiffs(const SUnit &SU) {
+ if (!SU.isInstr())
+ return false;
- return false;
+ // Cannot use pressure diffs for subregister defs or with physregs, it's
+ // imprecise in both cases.
+ for (const auto &Op : SU.getInstr()->operands()) {
+ if (!Op.isReg() || Op.isImplicit())
+ continue;
+ if (Op.getReg().isPhysical() ||
+ (Op.isDef() && Op.getSubReg() != AMDGPU::NoSubRegister))
+ return false;
+ }
+ return true;
}
static void getRegisterPressures(bool AtTop,
@@ -161,7 +160,7 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
Pressure.clear();
MaxPressure.clear();
- if (AtTop || !canUsePressureDiffs(SU)) {
+ if (AtTop || !canUsePressureDiffs(*SU)) {
getRegisterPressures(AtTop, RPTracker, SU, Pressure, MaxPressure);
} else {
// Reserve 4 slots.
@@ -185,7 +184,7 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||
Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=
CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32]) {
- errs() << "Register Pressure is innaccurate when calculated through "
+ errs() << "Register Pressure is inaccurate when calculated through "
"PressureDiff\n";
errs() << "SGPR got " << Pressure[AMDGPU::RegisterPressureSets::SReg_32]
<< ", expected "
@@ -193,7 +192,7 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
errs() << "VGPR got " << Pressure[AMDGPU::RegisterPressureSets::VGPR_32]
<< ", expected "
<< CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n";
- report_fatal_error("innaccurate register pressure calculation");
+ report_fatal_error("inaccurate register pressure calculation");
}
#endif
}
@@ -318,7 +317,7 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
BotCand.Policy != BotPolicy) {
BotCand.reset(CandPolicy());
pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand,
- /*IsBottomUp*/ true);
+ /*IsBottomUp=*/true);
assert(BotCand.Reason != NoCand && "failed to find the first candidate");
} else {
LLVM_DEBUG(traceCandidate(BotCand));
@@ -327,7 +326,7 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
SchedCandidate TCand;
TCand.reset(CandPolicy());
pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand,
- /*IsBottomUp*/ true);
+ /*IsBottomUp=*/true);
assert(TCand.SU == BotCand.SU &&
"Last pick result should correspond to re-picking right now");
}
@@ -340,7 +339,7 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
TopCand.Policy != TopPolicy) {
TopCand.reset(CandPolicy());
pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand,
- /*IsBottomUp*/ false);
+ /*IsBottomUp=*/false);
assert(TopCand.Reason != NoCand && "failed to find the first candidate");
} else {
LLVM_DEBUG(traceCandidate(TopCand));
@@ -349,7 +348,7 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
SchedCandidate TCand;
TCand.reset(CandPolicy());
pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand,
- /*IsBottomUp*/ false);
+ /*IsBottomUp=*/false);
assert(TCand.SU == TopCand.SU &&
"Last pick result should correspond to re-picking right now");
}
@@ -387,7 +386,7 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
CandPolicy NoPolicy;
TopCand.reset(NoPolicy);
pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand,
- /*IsBottomUp*/ false);
+ /*IsBottomUp=*/false);
assert(TopCand.Reason != NoCand && "failed to find a candidate");
SU = TopCand.SU;
}
@@ -398,7 +397,7 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
CandPolicy NoPolicy;
BotCand.reset(NoPolicy);
pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand,
- /*IsBottomUp*/ true);
+ /*IsBottomUp=*/true);
assert(BotCand.Reason != NoCand && "failed to find a candidate");
SU = BotCand.SU;
}
>From 9408ccf830c573315f17ebbcd29fecc67cbb1a22 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Thu, 13 Jun 2024 08:40:59 +0200
Subject: [PATCH 3/3] Comments
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 27 +++++++++++++++++++++
1 file changed, 27 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index a7fc273317c97..8d28d009c53d7 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -116,6 +116,22 @@ void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
<< ", SGPRExcessLimit = " << SGPRExcessLimit << "\n\n");
}
+/// Checks whether \p SU can use the cached DAG pressure diffs to compute the
+/// current register pressure.
+///
+/// This works for the common case, but it has a few exceptions that have been
+/// observed through trial and error:
+/// - Explicit physical register operands
+/// - Subregister definitions
+///
+/// In both of those cases, PressureDiff doesn't represent the actual pressure,
+/// and querying LiveIntervals through the RegPressureTracker is needed to get
+/// an accurate value.
+///
+/// We should eventually only use PressureDiff for maximum performance, but this
+/// already allows 80% of SUs to take the fast path without changing scheduling
+/// at all. Further changes would either change scheduling, or require a lot
+/// more logic to recover an accurate pressure estimate from the PressureDiffs.
static bool canUsePressureDiffs(const SUnit &SU) {
if (!SU.isInstr())
return false;
@@ -160,6 +176,17 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
Pressure.clear();
MaxPressure.clear();
+ // We try to use the cached PressureDiffs in the ScheduleDAG whenever
+ // possible over querying the RegPressureTracker.
+ //
+ // RegPressureTracker will make a lot of LIS queries which are very
+ // expensive, it is considered a slow function in this context.
+ //
+ // PressureDiffs are precomputed and cached, and getPressureDiff is just a
+ // trivial lookup into an array. It is pretty much free.
+ //
+ // In EXPENSIVE_CHECKS, we always query RPTracker to verify the results of
+ // PressureDiffs.
if (AtTop || !canUsePressureDiffs(*SU)) {
getRegisterPressures(AtTop, RPTracker, SU, Pressure, MaxPressure);
} else {
More information about the llvm-commits
mailing list