[llvm] 799c50f - [AMDGPU] Avoid second rescheduling for some regions
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 26 12:55:16 PST 2021
Author: Stanislav Mekhanoshin
Date: 2021-02-26T12:29:37-08:00
New Revision: 799c50fe935b610d7e3c092255779f1d5e2f39d8
URL: https://github.com/llvm/llvm-project/commit/799c50fe935b610d7e3c092255779f1d5e2f39d8
DIFF: https://github.com/llvm/llvm-project/commit/799c50fe935b610d7e3c092255779f1d5e2f39d8.diff
LOG: [AMDGPU] Avoid second rescheduling for some regions
If a region was not constrained by a high register pressure
and was not rescheduled without clustering we can skip
rescheduling it ClusteredLowOccupancyReschedule stage.
This improves scheduling speed by 25% on some kernels.
Differential Revision: https://reviews.llvm.org/D97506
Added:
Modified:
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 9b6ec896a6ce..0212b8e17641 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -21,7 +21,7 @@ using namespace llvm;
GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
const MachineSchedContext *C) :
GenericScheduler(C), TargetOccupancy(0), HasClusteredNodes(false),
- MF(nullptr) { }
+ HasExcessPressure(false), MF(nullptr) { }
void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) {
GenericScheduler::initialize(DAG);
@@ -104,11 +104,13 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU
// marked as RegExcess in tryCandidate() when they are compared with
// instructions that increase the register pressure.
if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
+ HasExcessPressure = true;
Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
}
if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
+ HasExcessPressure = true;
Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
}
@@ -122,6 +124,7 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU
int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
if (SGPRDelta >= 0 || VGPRDelta >= 0) {
+ HasExcessPressure = true;
if (SGPRDelta > VGPRDelta) {
Cand.RPDelta.CriticalMax =
PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
@@ -331,12 +334,17 @@ void GCNScheduleDAGMILive::schedule() {
}
GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
- // Set HasClusteredNodes to true for late stages where we are not interested
- // in it anymore. That way pickNode() will not scan SDep's when not needed.
- S.HasClusteredNodes = Stage >= UnclusteredReschedule;
+ // Set HasClusteredNodes to true for late stages where we have already
+ // collected it. That way pickNode() will not scan SDep's when not needed.
+ S.HasClusteredNodes = Stage > InitialSchedule;
+ S.HasExcessPressure = false;
ScheduleDAGMILive::schedule();
Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd);
RescheduleRegions[RegionIdx] = false;
+ if (Stage == InitialSchedule && S.HasClusteredNodes)
+ RegionsWithClusters[RegionIdx] = true;
+ if (S.HasExcessPressure)
+ RegionsWithHighRP[RegionIdx] = true;
if (!LIS)
return;
@@ -381,8 +389,10 @@ void GCNScheduleDAGMILive::schedule() {
unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
if (PressureAfter.getVGPRNum(false) > MaxVGPRs ||
PressureAfter.getAGPRNum() > MaxVGPRs ||
- PressureAfter.getSGPRNum() > MaxSGPRs)
+ PressureAfter.getSGPRNum() > MaxSGPRs) {
RescheduleRegions[RegionIdx] = true;
+ RegionsWithHighRP[RegionIdx] = true;
+ }
if (WavesAfter >= MinOccupancy) {
if (Stage == UnclusteredReschedule &&
@@ -392,7 +402,8 @@ void GCNScheduleDAGMILive::schedule() {
PressureAfter.less(ST, PressureBefore) ||
!RescheduleRegions[RegionIdx]) {
Pressure[RegionIdx] = PressureAfter;
- if (!S.HasClusteredNodes && (Stage + 1) == UnclusteredReschedule)
+ if (!RegionsWithClusters[RegionIdx] &&
+ (Stage + 1) == UnclusteredReschedule)
RescheduleRegions[RegionIdx] = false;
return;
} else {
@@ -401,7 +412,7 @@ void GCNScheduleDAGMILive::schedule() {
}
LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
- RescheduleRegions[RegionIdx] = S.HasClusteredNodes ||
+ RescheduleRegions[RegionIdx] = RegionsWithClusters[RegionIdx] ||
(Stage + 1) != UnclusteredReschedule;
RegionEnd = RegionBegin;
for (MachineInstr *MI : Unsched) {
@@ -535,7 +546,11 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
LiveIns.resize(Regions.size());
Pressure.resize(Regions.size());
RescheduleRegions.resize(Regions.size());
+ RegionsWithClusters.resize(Regions.size());
+ RegionsWithHighRP.resize(Regions.size());
RescheduleRegions.set();
+ RegionsWithClusters.reset();
+ RegionsWithHighRP.reset();
if (!Regions.empty())
BBLiveInMap = getBBLiveInMap();
@@ -580,7 +595,10 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
SavedMutations.swap(Mutations);
for (auto Region : Regions) {
- if (Stage == UnclusteredReschedule && !RescheduleRegions[RegionIdx]) {
+ if ((Stage == UnclusteredReschedule && !RescheduleRegions[RegionIdx]) ||
+ (Stage == ClusteredLowOccupancyReschedule &&
+ !RegionsWithClusters[RegionIdx] && !RegionsWithHighRP[RegionIdx])) {
+
++RegionIdx;
continue;
}
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index c96f73efedeb..15eba3f5eac0 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -54,6 +54,10 @@ class GCNMaxOccupancySchedStrategy final : public GenericScheduler {
// before a region scheduling to know if the region had such clusters.
bool HasClusteredNodes;
+ // schedule() have seen a an excess register pressure and had to track
+ // register pressure for actual scheduling heuristics.
+ bool HasExcessPressure;
+
MachineFunction *MF;
public:
@@ -100,6 +104,12 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
// or we generally desire to reschedule it.
BitVector RescheduleRegions;
+ // Record regions which use clustered loads/stores.
+ BitVector RegionsWithClusters;
+
+ // Record regions with high register pressure.
+ BitVector RegionsWithHighRP;
+
// Region live-in cache.
SmallVector<GCNRPTracker::LiveRegSet, 32> LiveIns;
More information about the llvm-commits
mailing list