[llvm] [AMDGPU] Optionally Use GCNRPTrackers during scheduling (PR #93090)
Jeffrey Byrnes via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 23 10:09:19 PDT 2024
https://github.com/jrbyrnes updated https://github.com/llvm/llvm-project/pull/93090
>From 47ac3d5e4c30c62c63067a54d6e6ad95ead1a558 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 21 May 2024 12:55:07 -0700
Subject: [PATCH 01/20] [AMDGPU] NFC: Add BBLiveOutMap & LiveOut Cache
Change-Id: I63cfd44e635cc4bee0e6780ca43b692c46e940b7
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 58 ++++++++++++++++++---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 42 ++++++++++++++-
2 files changed, 91 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 217279211531b4..f1f28ed30c5e7e 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -58,6 +58,11 @@ static cl::opt<bool>
"Wave Limited (amdgpu-limit-wave-threshold)."),
cl::init(false));
+static cl::opt<bool> GCNTrackers(
+ "amdgpu-use-amdgpu-trackers", cl::Hidden,
+ cl::desc("Use the AMDGPU specific RPTrackers during scheduling"),
+ cl::init(false));
+
const unsigned ScheduleMetrics::ScaleFactor = 100;
GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
@@ -571,7 +576,8 @@ GCNScheduleDAGMILive::GCNScheduleDAGMILive(
MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S)
: ScheduleDAGMILive(C, std::move(S)), ST(MF.getSubtarget<GCNSubtarget>()),
MFI(*MF.getInfo<SIMachineFunctionInfo>()),
- StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy) {
+ StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy),
+ RegionLiveOuts(this, /*IsLiveOut=*/true) {
LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
if (RelaxedOcc) {
@@ -613,6 +619,14 @@ GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {
return RPTracker.moveMaxPressure();
}
+static MachineInstr *getLastMIForRegion(MachineBasicBlock::iterator RegionBegin,
+ MachineBasicBlock::iterator RegionEnd) {
+ auto REnd = RegionEnd == RegionBegin->getParent()->end()
+ ? std::prev(RegionEnd)
+ : RegionEnd;
+ return &*skipDebugInstructionsBackward(REnd, RegionBegin);
+}
+
void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
const MachineBasicBlock *MBB) {
GCNDownwardRPTracker RPTracker(*LIS);
@@ -687,20 +701,45 @@ void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
}
DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
-GCNScheduleDAGMILive::getBBLiveInMap() const {
+GCNScheduleDAGMILive::getRegionLiveInMap() const {
assert(!Regions.empty());
- std::vector<MachineInstr *> BBStarters;
- BBStarters.reserve(Regions.size());
+ std::vector<MachineInstr *> RegionFirstMIs;
+ RegionFirstMIs.reserve(Regions.size());
auto I = Regions.rbegin(), E = Regions.rend();
auto *BB = I->first->getParent();
do {
auto *MI = &*skipDebugInstructionsForward(I->first, I->second);
- BBStarters.push_back(MI);
+ RegionFirstMIs.push_back(MI);
do {
++I;
} while (I != E && I->first->getParent() == BB);
} while (I != E);
- return getLiveRegMap(BBStarters, false /*After*/, *LIS);
+ return getLiveRegMap(RegionFirstMIs, /*After=*/false, *LIS);
+}
+
+DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
+GCNScheduleDAGMILive::getRegionLiveOutMap() const {
+ assert(!Regions.empty());
+ std::vector<MachineInstr *> RegionLastMIs;
+ RegionLastMIs.reserve(Regions.size());
+ for (auto &[RegionBegin, RegionEnd] : reverse(Regions))
+ RegionLastMIs.push_back(getLastMIForRegion(RegionBegin, RegionEnd));
+
+ return getLiveRegMap(RegionLastMIs, /*After=*/true, *LIS);
+}
+
+void RegionPressureMap::buildLiveRegMap() {
+ IdxToInstruction.clear();
+
+ BBLiveRegMap =
+ IsLiveOut ? DAG->getRegionLiveOutMap() : DAG->getRegionLiveInMap();
+ for (unsigned I = 0; I < DAG->Regions.size(); I++) {
+ MachineInstr *RegionKey =
+ IsLiveOut
+ ? getLastMIForRegion(DAG->Regions[I].first, DAG->Regions[I].second)
+ : &*DAG->Regions[I].first;
+ IdxToInstruction[I] = RegionKey;
+ }
}
void GCNScheduleDAGMILive::finalizeSchedule() {
@@ -726,8 +765,11 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
void GCNScheduleDAGMILive::runSchedStages() {
LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
- if (!Regions.empty())
- BBLiveInMap = getBBLiveInMap();
+ if (!Regions.empty()) {
+ BBLiveInMap = getRegionLiveInMap();
+ if (GCNTrackers)
+ RegionLiveOuts.buildLiveRegMap();
+ }
GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl);
while (S.advanceStage()) {
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index f0aea2bc4ab865..c402fb1ef373c9 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -163,6 +163,32 @@ inline raw_ostream &operator<<(raw_ostream &OS, const ScheduleMetrics &Sm) {
return OS;
}
+class GCNScheduleDAGMILive;
+class RegionPressureMap {
+ GCNScheduleDAGMILive *DAG;
+ // The live in/out pressure as indexed by the first or last MI in the region
+ // before scheduling.
+ DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveRegMap;
+ // The mapping of RegionIDx to key instruction
+ DenseMap<unsigned, MachineInstr *> IdxToInstruction;
+ // Whether we are calculating LiveOuts or LiveIns
+ bool IsLiveOut;
+
+public:
+ RegionPressureMap() {}
+ RegionPressureMap(GCNScheduleDAGMILive *GCNDAG, bool LiveOut)
+ : DAG(GCNDAG), IsLiveOut(LiveOut) {}
+ // Build the Instr->LiveReg and RegionIdx->Instr maps
+ void buildLiveRegMap();
+
+ // Retrieve the LiveReg for a given RegionIdx
+ GCNRPTracker::LiveRegSet &getLiveRegsForRegionIdx(unsigned RegionIdx) {
+ assert(IdxToInstruction.find(RegionIdx) != IdxToInstruction.end());
+ MachineInstr *Key = IdxToInstruction[RegionIdx];
+ return BBLiveRegMap[Key];
+ }
+};
+
class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
friend class GCNSchedStage;
friend class OccInitialScheduleStage;
@@ -170,6 +196,7 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
friend class ClusteredLowOccStage;
friend class PreRARematStage;
friend class ILPInitialScheduleStage;
+ friend class RegionPressureMap;
const GCNSubtarget &ST;
@@ -211,9 +238,22 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
// Temporary basic block live-in cache.
DenseMap<const MachineBasicBlock *, GCNRPTracker::LiveRegSet> MBBLiveIns;
+ // The map of the initial first region instruction to region live in registers
DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveInMap;
- DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getBBLiveInMap() const;
+ // Calculate the map of the initial first region instruction to region live in
+ // registers
+ DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getRegionLiveInMap() const;
+
+ // Calculate the map of the initial last region instruction to region live out
+ // registers
+ DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
+ getRegionLiveOutMap() const;
+
+ // The live out registers per region. These are internally stored as a map of
+ // the initial last region instruction to region live out registers, but can
+ // be retreived with the regionIdx by calls to getLiveRegsForRegionIdx.
+ RegionPressureMap RegionLiveOuts;
// Return current region pressure.
GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;
>From fdc457d964c88e0e41078f939d1fc6b67a62af33 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 21 May 2024 13:34:59 -0700
Subject: [PATCH 02/20] [AMDGPU] NFC: Provide RPTracker interface for external
iterators
Change-Id: I79b54722e6e858961486248d94766c3f3c161160
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 284 ++++++++++++++++++++--
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 95 ++++++--
2 files changed, 330 insertions(+), 49 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index c83af729f501fe..c6bffc64401368 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -288,6 +288,72 @@ collectVirtualRegUses(SmallVectorImpl<RegisterMaskPair> &RegMaskPairs,
}
}
+static LaneBitmask getRegLanes(ArrayRef<RegisterMaskPair> RegUnits,
+ Register RegUnit) {
+ auto I = llvm::find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
+ return Other.RegUnit == RegUnit;
+ });
+ if (I == RegUnits.end())
+ return LaneBitmask::getNone();
+ return I->LaneMask;
+}
+
+static LaneBitmask
+getLanesWithProperty(const LiveIntervals &LIS, const MachineRegisterInfo &MRI,
+ bool TrackLaneMasks, Register RegUnit, SlotIndex Pos,
+ LaneBitmask SafeDefault,
+ bool (*Property)(const LiveRange &LR, SlotIndex Pos)) {
+ if (RegUnit.isVirtual()) {
+ const LiveInterval &LI = LIS.getInterval(RegUnit);
+ LaneBitmask Result;
+ if (TrackLaneMasks && LI.hasSubRanges()) {
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ if (Property(SR, Pos))
+ Result |= SR.LaneMask;
+ }
+ } else if (Property(LI, Pos)) {
+ Result = TrackLaneMasks ? MRI.getMaxLaneMaskForVReg(RegUnit)
+ : LaneBitmask::getAll();
+ }
+
+ return Result;
+ } else {
+ const LiveRange *LR = LIS.getCachedRegUnit(RegUnit);
+ // Be prepared for missing liveranges: We usually do not compute liveranges
+ // for physical registers on targets with many registers (GPUs).
+ if (LR == nullptr)
+ return SafeDefault;
+ return Property(*LR, Pos) ? LaneBitmask::getAll() : LaneBitmask::getNone();
+ }
+}
+
+/// Helper to find a vreg use between two indices [PriorUseIdx, NextUseIdx).
+/// The query starts with a lane bitmask which gets lanes/bits removed for every
+/// use we find.
+static LaneBitmask findUseBetween(unsigned Reg, LaneBitmask LastUseMask,
+ SlotIndex PriorUseIdx, SlotIndex NextUseIdx,
+ const MachineRegisterInfo &MRI,
+ const LiveIntervals *LIS,
+ bool Upward = false) {
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ for (const MachineOperand &MO : MRI.use_nodbg_operands(Reg)) {
+ if (MO.isUndef())
+ continue;
+ const MachineInstr *MI = MO.getParent();
+ SlotIndex InstSlot = LIS->getInstructionIndex(*MI).getRegSlot();
+ bool InRange = Upward ? (InstSlot > PriorUseIdx && InstSlot <= NextUseIdx)
+ : (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx);
+ if (InRange) {
+ unsigned SubRegIdx = MO.getSubReg();
+ LaneBitmask UseMask = TRI.getSubRegIndexLaneMask(SubRegIdx);
+ LastUseMask &= ~UseMask;
+ if (LastUseMask.none())
+ return LaneBitmask::getNone();
+ }
+ }
+ return LastUseMask;
+}
+
///////////////////////////////////////////////////////////////////////////////
// GCNRPTracker
@@ -343,17 +409,47 @@ void GCNRPTracker::reset(const MachineInstr &MI,
MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs);
}
-////////////////////////////////////////////////////////////////////////////////
-// GCNUpwardRPTracker
-
-void GCNUpwardRPTracker::reset(const MachineRegisterInfo &MRI_,
- const LiveRegSet &LiveRegs_) {
+void GCNRPTracker::reset(const MachineRegisterInfo &MRI_,
+ const LiveRegSet &LiveRegs_) {
MRI = &MRI_;
LiveRegs = LiveRegs_;
LastTrackedMI = nullptr;
MaxPressure = CurPressure = getRegPressure(MRI_, LiveRegs_);
}
+void GCNRPTracker::bumpDeadDefs(ArrayRef<RegisterMaskPair> DeadDefs) {
+ for (const RegisterMaskPair &P : DeadDefs) {
+ Register Reg = P.RegUnit;
+ if (!Reg.isVirtual())
+ continue;
+ LaneBitmask LiveMask = LiveRegs[Reg];
+ LaneBitmask BumpedMask = LiveMask | P.LaneMask;
+ CurPressure.inc(Reg, LiveMask, BumpedMask, *MRI);
+ }
+ MaxPressure = max(MaxPressure, CurPressure);
+ for (const RegisterMaskPair &P : DeadDefs) {
+ Register Reg = P.RegUnit;
+ if (!Reg.isVirtual())
+ continue;
+ LaneBitmask LiveMask = LiveRegs[Reg];
+ LaneBitmask BumpedMask = LiveMask | P.LaneMask;
+ CurPressure.inc(Reg, BumpedMask, LiveMask, *MRI);
+ }
+}
+
+LaneBitmask GCNRPTracker::getLastUsedLanes(Register RegUnit,
+ SlotIndex Pos) const {
+ return getLanesWithProperty(
+ LIS, *MRI, true, RegUnit, Pos.getBaseIndex(), LaneBitmask::getNone(),
+ [](const LiveRange &LR, SlotIndex Pos) {
+ const LiveRange::Segment *S = LR.getSegmentContaining(Pos);
+ return S != nullptr && S->end == Pos.getRegSlot();
+ });
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// GCNUpwardRPTracker
+
void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
assert(MRI && "call reset first");
@@ -414,6 +510,63 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
assert(CurPressure == getRegPressure(*MRI, LiveRegs));
}
+void GCNUpwardRPTracker::bumpUpwardPressure(const MachineInstr *MI) {
+ assert(!MI->isDebugOrPseudoInstr() && "Expect a nondebug instruction.");
+
+ SlotIndex SlotIdx = LIS.getInstructionIndex(*MI).getRegSlot();
+
+ // Account for register pressure similar to RegPressureTracker::recede().
+ RegisterOperands RegOpers;
+ const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ RegOpers.collect(*MI, *TRI, *MRI, true, /*IgnoreDead=*/true);
+ assert(RegOpers.DeadDefs.empty());
+ RegOpers.adjustLaneLiveness(LIS, *MRI, SlotIdx);
+ RegOpers.detectDeadDefs(*MI, LIS);
+
+ // Boost max pressure for all dead defs together.
+ // Since CurrSetPressure and MaxSetPressure
+ bumpDeadDefs(RegOpers.DeadDefs);
+
+ // Kill liveness at live defs.
+ for (const RegisterMaskPair &P : RegOpers.Defs) {
+ Register Reg = P.RegUnit;
+ if (!Reg.isVirtual())
+ continue;
+ LaneBitmask LiveAfter = LiveRegs[Reg];
+ LaneBitmask UseLanes = getRegLanes(RegOpers.Uses, Reg);
+ LaneBitmask DefLanes = P.LaneMask;
+ LaneBitmask LiveBefore = (LiveAfter & ~DefLanes) | UseLanes;
+
+ // There may be parts of the register that were dead before the
+ // instruction, but became live afterwards. Similarly, some parts
+ // may have been killed in this instruction.
+ CurPressure.inc(Reg, LiveAfter, LiveAfter & LiveBefore, *MRI);
+ CurPressure.inc(Reg, LiveAfter, ~LiveAfter & LiveBefore, *MRI);
+ MaxPressure = max(MaxPressure, CurPressure);
+ }
+ // Generate liveness for uses.
+ for (const RegisterMaskPair &P : RegOpers.Uses) {
+ Register Reg = P.RegUnit;
+ if (!Reg.isVirtual())
+ continue;
+ // If this register was also in a def operand, we've handled it
+ // with defs.
+ if (getRegLanes(RegOpers.Defs, Reg).any())
+ continue;
+ LaneBitmask LiveAfter = LiveRegs[Reg];
+ SlotIndex CurrIdx =
+ LastTrackedMI ? LIS.getInstructionIndex(*LastTrackedMI).getRegSlot()
+ : LIS.getMBBEndIdx(MI->getParent());
+ ;
+ LaneBitmask LastUseMask =
+ findUseBetween(Reg, P.LaneMask, SlotIdx, CurrIdx, *MRI, &LIS, true);
+ LastUseMask &= ~LiveAfter;
+ LaneBitmask LiveBefore = (LiveAfter | LastUseMask);
+ CurPressure.inc(Reg, LiveAfter, LiveBefore, *MRI);
+ }
+ MaxPressure = max(MaxPressure, CurPressure);
+}
+
////////////////////////////////////////////////////////////////////////////////
// GCNDownwardRPTracker
@@ -430,28 +583,44 @@ bool GCNDownwardRPTracker::reset(const MachineInstr &MI,
return true;
}
-bool GCNDownwardRPTracker::advanceBeforeNext() {
+bool GCNDownwardRPTracker::advanceBeforeNext(MachineInstr *MI,
+ bool UseInternalIterator,
+ LiveIntervals *TheLIS) {
assert(MRI && "call reset first");
- if (!LastTrackedMI)
- return NextMI == MBBEnd;
-
- assert(NextMI == MBBEnd || !NextMI->isDebugInstr());
+ SlotIndex SI;
+ LiveIntervals *CurrLIS;
+ MachineInstr *CurrMI;
+ if (UseInternalIterator) {
+ if (!LastTrackedMI)
+ return NextMI == MBBEnd;
+
+ assert(NextMI == MBBEnd || !NextMI->isDebugInstr());
+ CurrLIS = const_cast<LiveIntervals *>(&LIS);
+ CurrMI = const_cast<MachineInstr *>(LastTrackedMI);
+
+ SI = NextMI == MBBEnd
+ ? CurrLIS->getInstructionIndex(*LastTrackedMI).getDeadSlot()
+ : CurrLIS->getInstructionIndex(*NextMI).getBaseIndex();
+ } else { //! UseInternalIterator
+ CurrLIS = TheLIS;
+ SI = CurrLIS->getInstructionIndex(*MI).getBaseIndex();
+ CurrMI = MI;
+ }
- SlotIndex SI = NextMI == MBBEnd
- ? LIS.getInstructionIndex(*LastTrackedMI).getDeadSlot()
- : LIS.getInstructionIndex(*NextMI).getBaseIndex();
assert(SI.isValid());
// Remove dead registers or mask bits.
SmallSet<Register, 8> SeenRegs;
- for (auto &MO : LastTrackedMI->operands()) {
+ for (auto &MO : CurrMI->operands()) {
if (!MO.isReg() || !MO.getReg().isVirtual())
continue;
if (MO.isUse() && !MO.readsReg())
continue;
+ if (!UseInternalIterator && MO.isDef())
+ continue;
if (!SeenRegs.insert(MO.getReg()).second)
continue;
- const LiveInterval &LI = LIS.getInterval(MO.getReg());
+ const LiveInterval &LI = CurrLIS->getInterval(MO.getReg());
if (LI.hasSubRanges()) {
auto It = LiveRegs.end();
for (const auto &S : LI.subranges()) {
@@ -481,15 +650,22 @@ bool GCNDownwardRPTracker::advanceBeforeNext() {
LastTrackedMI = nullptr;
- return NextMI == MBBEnd;
+ return UseInternalIterator && (NextMI == MBBEnd);
}
-void GCNDownwardRPTracker::advanceToNext() {
- LastTrackedMI = &*NextMI++;
- NextMI = skipDebugInstructionsForward(NextMI, MBBEnd);
+void GCNDownwardRPTracker::advanceToNext(MachineInstr *MI,
+ bool UseInternalIterator) {
+ if (UseInternalIterator) {
+ LastTrackedMI = &*NextMI++;
+ NextMI = skipDebugInstructionsForward(NextMI, MBBEnd);
+ } else {
+ LastTrackedMI = MI;
+ }
+
+ MachineInstr *CurrMI = const_cast<MachineInstr *>(LastTrackedMI);
// Add new registers or mask bits.
- for (const auto &MO : LastTrackedMI->all_defs()) {
+ for (const auto &MO : CurrMI->all_defs()) {
Register Reg = MO.getReg();
if (!Reg.isVirtual())
continue;
@@ -502,11 +678,12 @@ void GCNDownwardRPTracker::advanceToNext() {
MaxPressure = max(MaxPressure, CurPressure);
}
-bool GCNDownwardRPTracker::advance() {
- if (NextMI == MBBEnd)
+bool GCNDownwardRPTracker::advance(MachineInstr *MI, bool UseInternalIterator,
+ LiveIntervals *TheLIS) {
+ if (UseInternalIterator && NextMI == MBBEnd)
return false;
- advanceBeforeNext();
- advanceToNext();
+ advanceBeforeNext(MI, UseInternalIterator, TheLIS);
+ advanceToNext(MI, UseInternalIterator);
return true;
}
@@ -548,6 +725,65 @@ Printable llvm::reportMismatch(const GCNRPTracker::LiveRegSet &LISLR,
});
}
+void GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI) {
+ assert(!MI->isDebugOrPseudoInstr() && "Expect a nondebug instruction.");
+
+ SlotIndex SlotIdx;
+ SlotIdx = LIS.getInstructionIndex(*MI).getRegSlot();
+
+ // Account for register pressure similar to RegPressureTracker::recede().
+ RegisterOperands RegOpers;
+ const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ RegOpers.collect(*MI, *TRI, *MRI, true, /*IgnoreDead=*/false);
+ RegOpers.adjustLaneLiveness(LIS, *MRI, SlotIdx);
+
+ for (const RegisterMaskPair &Use : RegOpers.Uses) {
+ Register Reg = Use.RegUnit;
+ if (!Reg.isVirtual())
+ continue;
+ LaneBitmask LastUseMask = getLastUsedLanes(Reg, SlotIdx);
+ if (LastUseMask.none())
+ continue;
+ // The LastUseMask is queried from the liveness information of instruction
+ // which may be further down the schedule. Some lanes may actually not be
+ // last uses for the current position.
+ // FIXME: allow the caller to pass in the list of vreg uses that remain
+ // to be bottom-scheduled to avoid searching uses at each query.
+ SlotIndex CurrIdx;
+ const MachineBasicBlock *MBB = MI->getParent();
+ MachineBasicBlock::const_iterator IdxPos = skipDebugInstructionsForward(
+ LastTrackedMI ? LastTrackedMI : MBB->begin(), MBB->end());
+ if (IdxPos == MBB->end()) {
+ CurrIdx = LIS.getMBBEndIdx(MBB);
+ } else {
+ CurrIdx = LIS.getInstructionIndex(*IdxPos).getRegSlot();
+ }
+
+ LastUseMask =
+ findUseBetween(Reg, LastUseMask, CurrIdx, SlotIdx, *MRI, &LIS);
+ if (LastUseMask.none())
+ continue;
+
+ LaneBitmask LiveMask = LiveRegs[Reg];
+ LaneBitmask NewMask = LiveMask & ~LastUseMask;
+ CurPressure.inc(Reg, LiveMask, NewMask, *MRI);
+ }
+
+ // Generate liveness for defs.
+ for (const RegisterMaskPair &Def : RegOpers.Defs) {
+ Register Reg = Def.RegUnit;
+ if (!Reg.isVirtual())
+ continue;
+ LaneBitmask LiveMask = LiveRegs[Reg];
+ LaneBitmask NewMask = LiveMask | Def.LaneMask;
+ CurPressure.inc(Reg, LiveMask, NewMask, *MRI);
+ }
+ MaxPressure = max(MaxPressure, CurPressure);
+
+ // Boost pressure for all dead defs together.
+ bumpDeadDefs(RegOpers.DeadDefs);
+}
+
bool GCNUpwardRPTracker::isValid() const {
const auto &SI = LIS.getInstructionIndex(*LastTrackedMI).getBaseIndex();
const auto LISLR = llvm::getLiveRegs(SI, LIS, *MRI);
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 54dc1972d27619..a79e412ce33449 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -19,6 +19,7 @@
#include "GCNSubtarget.h"
#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/RegisterPressure.h"
#include <algorithm>
namespace llvm {
@@ -149,6 +150,9 @@ inline GCNRegPressure operator-(const GCNRegPressure &P1,
return Diff;
}
+///////////////////////////////////////////////////////////////////////////////
+// GCNRPTracker
+
class GCNRPTracker {
public:
using LiveRegSet = DenseMap<unsigned, LaneBitmask>;
@@ -165,7 +169,14 @@ class GCNRPTracker {
void reset(const MachineInstr &MI, const LiveRegSet *LiveRegsCopy,
bool After);
+ void bumpDeadDefs(ArrayRef<RegisterMaskPair> DeadDefs);
+
+ LaneBitmask getLastUsedLanes(Register RegUnit, SlotIndex Pos) const;
+
public:
+ // reset tracker and set live register set to the specified value.
+ void reset(const MachineRegisterInfo &MRI_, const LiveRegSet &LiveRegs_);
+
// live regs for the current state
const decltype(LiveRegs) &getLiveRegs() const { return LiveRegs; }
const MachineInstr *getLastTrackedMI() const { return LastTrackedMI; }
@@ -182,34 +193,40 @@ class GCNRPTracker {
GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
const MachineRegisterInfo &MRI);
+////////////////////////////////////////////////////////////////////////////////
+// GCNUpwardRPTracker
+
class GCNUpwardRPTracker : public GCNRPTracker {
public:
GCNUpwardRPTracker(const LiveIntervals &LIS_) : GCNRPTracker(LIS_) {}
- // reset tracker and set live register set to the specified value.
- void reset(const MachineRegisterInfo &MRI_, const LiveRegSet &LiveRegs_);
+ using GCNRPTracker::reset;
- // reset tracker at the specified slot index.
+ /// reset tracker at the specified slot index \p SI.
void reset(const MachineRegisterInfo &MRI, SlotIndex SI) {
- reset(MRI, llvm::getLiveRegs(SI, LIS, MRI));
+ GCNRPTracker::reset(MRI, llvm::getLiveRegs(SI, LIS, MRI));
}
- // reset tracker to the end of the MBB.
+ /// reset tracker to the end of the \p MBB.
void reset(const MachineBasicBlock &MBB) {
reset(MBB.getParent()->getRegInfo(),
LIS.getSlotIndexes()->getMBBEndIdx(&MBB));
}
- // reset tracker to the point just after MI (in program order).
+ /// reset tracker to the point just after \p MI (in program order).
void reset(const MachineInstr &MI) {
reset(MI.getMF()->getRegInfo(), LIS.getInstructionIndex(MI).getDeadSlot());
}
- // move to the state just before the MI (in program order).
+ /// Move to the state of RP just before the \p MI . If \p UseInternalIterator
+ /// is set, also update the internal iterators. Setting \p UseInternalIterator
+ /// to false allows for an externally managed iterator / program order.
void recede(const MachineInstr &MI);
- // checks whether the tracker's state after receding MI corresponds
- // to reported by LIS.
+ void bumpUpwardPressure(const MachineInstr *MI);
+
+ /// \p returns whether the tracker's state after receding MI corresponds
+ /// to reported by LIS.
bool isValid() const;
const GCNRegPressure &getMaxPressure() const { return MaxPressure; }
@@ -223,6 +240,9 @@ class GCNUpwardRPTracker : public GCNRPTracker {
}
};
+////////////////////////////////////////////////////////////////////////////////
+// GCNDownwardRPTracker
+
class GCNDownwardRPTracker : public GCNRPTracker {
// Last position of reset or advanceBeforeNext
MachineBasicBlock::const_iterator NextMI;
@@ -232,37 +252,62 @@ class GCNDownwardRPTracker : public GCNRPTracker {
public:
GCNDownwardRPTracker(const LiveIntervals &LIS_) : GCNRPTracker(LIS_) {}
+ using GCNRPTracker::reset;
+
MachineBasicBlock::const_iterator getNext() const { return NextMI; }
- // Return MaxPressure and clear it.
+ /// \p return MaxPressure and clear it.
GCNRegPressure moveMaxPressure() {
auto Res = MaxPressure;
MaxPressure.clear();
return Res;
}
- // Reset tracker to the point before the MI
- // filling live regs upon this point using LIS.
- // Returns false if block is empty except debug values.
+ /// Reset tracker to the point before the \p MI
+ /// filling \p LiveRegs upon this point using LIS.
+ /// \p returns false if block is empty except debug values.
bool reset(const MachineInstr &MI, const LiveRegSet *LiveRegs = nullptr);
- // Move to the state right before the next MI or after the end of MBB.
- // Returns false if reached end of the block.
- bool advanceBeforeNext();
-
- // Move to the state at the MI, advanceBeforeNext has to be called first.
- void advanceToNext();
-
- // Move to the state at the next MI. Returns false if reached end of block.
- bool advance();
-
- // Advance instructions until before End.
+ /// Move to the state right before the next MI or after the end of MBB.
+ /// \p returns false if reached end of the block.
+ /// If \p UseInternalIterator is true, then internal iterators are used and
+ /// set to process in program order. If \p UseInternalIterator is false, then
+ /// it is assumed that the tracker is using an externally managed iterator,
+ /// and advance* calls will not update the state of the iterator. In such
+ /// cases, the tracker will move to the state right before the provided \p MI
+ /// and use the provided \p TheLIS for RP calculations.
+ bool advanceBeforeNext(MachineInstr *MI = nullptr,
+ bool UseInternalIterator = true,
+ LiveIntervals *TheLIS = nullptr);
+
+ /// Move to the state at the MI, advanceBeforeNext has to be called first.
+ /// If \p UseInternalIterator is true, then internal iterators are used and
+ /// set to process in program order. If \p UseInternalIterator is false, then
+ /// it is assumed that the tracker is using an externally managed iterator,
+ /// and advance* calls will not update the state of the iterator. In such
+ /// cases, the tracker will move to the state at the provided \p MI .
+ void advanceToNext(MachineInstr *MI = nullptr,
+ bool UseInternalIterator = true);
+
+ /// Move to the state at the next MI. \p returns false if reached end of
+ /// block. If \p UseInternalIterator is true, then internal iterators are used
+ /// and set to process in program order. If \p UseInternalIterator is false,
+ /// then it is assumed that the tracker is using an externally managed
+ /// iterator, and advance* calls will not update the state of the iterator. In
+ /// such cases, the tracker will move to the state right before the provided
+ /// \p MI and use the provided \p TheLIS for RP calculations.
+ bool advance(MachineInstr *MI = nullptr, bool UseInternalIterator = true,
+ LiveIntervals *TheLIS = nullptr);
+
+ /// Advance instructions until before \p End.
bool advance(MachineBasicBlock::const_iterator End);
- // Reset to Begin and advance to End.
+ /// Reset to \p Begin and advance to \p End.
bool advance(MachineBasicBlock::const_iterator Begin,
MachineBasicBlock::const_iterator End,
const LiveRegSet *LiveRegsCopy = nullptr);
+
+ void bumpDownwardPressure(const MachineInstr *MI);
};
LaneBitmask getLiveLaneMask(unsigned Reg,
>From eb3bf2f6cfc801df54288efad7f187afa6894a35 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 21 May 2024 18:04:25 -0700
Subject: [PATCH 03/20] [AMDGPU] Optionally Use AMDGPU RPTrackers during
scheduling
Change-Id: I6ae56149c1eb49ea85362267174cc6274c416330
---
.../Target/AMDGPU/GCNIterativeScheduler.cpp | 2 +-
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 1 -
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 90 ++++++++++++++++---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 19 +++-
4 files changed, 96 insertions(+), 16 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index 061b0515031b1b..79656f5b2b9f48 100644
--- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -480,7 +480,7 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, "
"target occupancy = "
<< TgtOcc << '\n');
- GCNMaxOccupancySchedStrategy LStrgy(Context);
+ GCNMaxOccupancySchedStrategy LStrgy(Context, /*IsLegacyScheduler*/ true);
unsigned FinalOccupancy = std::min(Occ, MFI->getOccupancy());
for (int I = 0; I < NumPasses; ++I) {
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index a79e412ce33449..f78e4d7da0a1dd 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -176,7 +176,6 @@ class GCNRPTracker {
public:
// reset tracker and set live register set to the specified value.
void reset(const MachineRegisterInfo &MRI_, const LiveRegSet &LiveRegs_);
-
// live regs for the current state
const decltype(LiveRegs) &getLiveRegs() const { return LiveRegs; }
const MachineInstr *getLastTrackedMI() const { return LastTrackedMI; }
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index f1f28ed30c5e7e..4533b61baa72fc 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -67,6 +67,7 @@ const unsigned ScheduleMetrics::ScaleFactor = 100;
GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
: GenericScheduler(C), TargetOccupancy(0), MF(nullptr),
+ TheTracker(*C->LIS), TheUpwardTracker(*C->LIS),
HasHighPressure(false) {}
void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
@@ -156,14 +157,37 @@ static bool canUsePressureDiffs(const SUnit &SU) {
static void getRegisterPressures(bool AtTop,
const RegPressureTracker &RPTracker, SUnit *SU,
std::vector<unsigned> &Pressure,
- std::vector<unsigned> &MaxPressure) {
+ std::vector<unsigned> &MaxPressure,
+ GCNDownwardRPTracker &TheTracker,
+ GCNUpwardRPTracker &TheUpwardTracker,
+ ScheduleDAGMI *DAG) {
// getDownwardPressure() and getUpwardPressure() make temporary changes to
// the tracker, so we need to pass those function a non-const copy.
RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
- if (AtTop)
- TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
- else
- TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
+ if (!GCNTrackers) {
+ if (AtTop)
+ TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
+ else
+ TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
+ } else {
+ if (AtTop) {
+ GCNDownwardRPTracker TempTopTracker(TheTracker);
+ auto MI = SU->getInstr();
+ TempTopTracker.advance(MI, true, DAG->getLIS());
+
+ Pressure[AMDGPU::RegisterPressureSets::SReg_32] = TempTopTracker.getPressure().getSGPRNum();
+ Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = TempTopTracker.getPressure().getVGPRNum(false);
+ }
+
+ else {
+ GCNUpwardRPTracker TempBotTracker(TheUpwardTracker);
+ auto MI = SU->getInstr();
+ TempBotTracker.recede(*MI, true);
+
+ Pressure[AMDGPU::RegisterPressureSets::SReg_32] = TempBotTracker.getPressure().getSGPRNum();
+ Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = TempBotTracker.getPressure().getVGPRNum(false);
+ }
+ }
}
void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
@@ -192,8 +216,8 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
//
// In EXPENSIVE_CHECKS, we always query RPTracker to verify the results of
// PressureDiffs.
- if (AtTop || !canUsePressureDiffs(*SU)) {
- getRegisterPressures(AtTop, RPTracker, SU, Pressure, MaxPressure);
+ if (AtTop || !canUsePressureDiffs(*SU) || GCNTrackers) {
+ getRegisterPressures(AtTop, RPTracker, SU, Pressure, MaxPressure, TheTracker, TheUpwardTracker, DAG);
} else {
// Reserve 4 slots.
Pressure.resize(4, 0);
@@ -211,7 +235,11 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
#ifdef EXPENSIVE_CHECKS
std::vector<unsigned> CheckPressure, CheckMaxPressure;
+<<<<<<< HEAD
getRegisterPressures(AtTop, RPTracker, SU, CheckPressure, CheckMaxPressure);
+=======
+ getRegisterPressures(AtTop, RPTracker, SU, CheckPressure, CheckMaxPressure,TheTracker,TheUpwardTracker, DAG);
+>>>>>>> 3fc6929b4a78... [AMDGPU] Optionally Use AMDGPU RPTrackers during scheduling
if (Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=
CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||
Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=
@@ -299,8 +327,16 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
unsigned SGPRPressure = 0;
unsigned VGPRPressure = 0;
if (DAG->isTrackingPressure()) {
- SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
- VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
+ SGPRPressure =
+ GCNTrackers
+ ? (Zone.isTop() ? TheTracker.getPressure().getSGPRNum()
+ : TheUpwardTracker.getPressure().getSGPRNum())
+ : Pressure[AMDGPU::RegisterPressureSets::SReg_32];
+ VGPRPressure =
+ GCNTrackers
+ ? (Zone.isTop() ? TheTracker.getPressure().getVGPRNum(false)
+ : TheUpwardTracker.getPressure().getVGPRNum(false))
+ : Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
}
ReadyQueue &Q = Zone.Available;
for (SUnit *SU : Q) {
@@ -449,6 +485,16 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
return SU;
}
+void GCNSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
+ if (GCNTrackers) {
+ MachineInstr *MI = SU->getInstr();
+ IsTopNode ? (void)TheTracker.advance(MI, true, DAG->getLIS())
+ : TheUpwardTracker.recede(*MI, true);
+ }
+
+ return GenericScheduler::schedNode(SU, IsTopNode);
+}
+
GCNSchedStageID GCNSchedStrategy::getCurrentStage() {
assert(CurrentStage && CurrentStage != SchedStages.end());
return *CurrentStage;
@@ -475,12 +521,13 @@ GCNSchedStageID GCNSchedStrategy::getNextStage() const {
}
GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
- const MachineSchedContext *C)
+ const MachineSchedContext *C, bool IsLegacyScheduler)
: GCNSchedStrategy(C) {
SchedStages.push_back(GCNSchedStageID::OccInitialSchedule);
SchedStages.push_back(GCNSchedStageID::UnclusteredHighRPReschedule);
SchedStages.push_back(GCNSchedStageID::ClusteredLowOccupancyReschedule);
SchedStages.push_back(GCNSchedStageID::PreRARematerialize);
+ GCNTrackers = GCNTrackers & !IsLegacyScheduler;
}
GCNMaxILPSchedStrategy::GCNMaxILPSchedStrategy(const MachineSchedContext *C)
@@ -787,6 +834,20 @@ void GCNScheduleDAGMILive::runSchedStages() {
continue;
}
+ if (GCNTrackers) {
+ GCNDownwardRPTracker *TheTracker = S.getTracker();
+ GCNUpwardRPTracker *TheUpwardTracker = S.getUpwardTracker();
+ GCNRPTracker::LiveRegSet *RegionLiveIns = &LiveIns[Stage->getRegionIdx()];
+
+ reinterpret_cast<GCNRPTracker *>(TheTracker)->reset(
+ Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
+ *RegionLiveIns);
+ reinterpret_cast<GCNRPTracker *>(TheUpwardTracker)->reset(
+ Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
+ RegionLiveOuts.getLiveRegsForRegionIdx(Stage->getRegionIdx()));
+
+ }
+
ScheduleDAGMILive::schedule();
Stage->finalizeGCNRegion();
}
@@ -1057,6 +1118,7 @@ void GCNSchedStage::finalizeGCNRegion() {
void GCNSchedStage::checkScheduling() {
// Check the results of scheduling.
PressureAfter = DAG.getRealRegPressure(RegionIdx);
+
LLVM_DEBUG(dbgs() << "Pressure after scheduling: " << print(PressureAfter));
LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n");
@@ -1608,9 +1670,6 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
MachineInstr *MI = Entry.first;
MachineInstr *OldMI = Entry.second;
- // Remove OldMI from BBLiveInMap since we are sinking it from its MBB.
- DAG.BBLiveInMap.erase(OldMI);
-
// Remove OldMI and update LIS
Register Reg = MI->getOperand(0).getReg();
LIS->RemoveMachineInstrFromMaps(*OldMI);
@@ -1628,6 +1687,11 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
DAG.Regions = NewRegions;
DAG.RescheduleRegions = NewRescheduleRegions;
+ DAG.BBLiveInMap = DAG.getBBLiveInMap();
+
+ if (GCNTrackers)
+ DAG.RegionLiveOuts.buildLiveRegMap();
+
SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
MFI.increaseOccupancy(MF, ++DAG.MinOccupancy);
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index c402fb1ef373c9..8088339fbd26c2 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -70,6 +70,12 @@ class GCNSchedStrategy : public GenericScheduler {
// Pointer to the current SchedStageID.
SmallVectorImpl<GCNSchedStageID>::iterator CurrentStage = nullptr;
+ // GCN RP Tracker for top-down scheduling
+ mutable GCNDownwardRPTracker TheTracker;
+
+ // GCN RP Tracker for botttom-up scheduling
+ mutable GCNUpwardRPTracker TheUpwardTracker;
+
public:
// schedule() have seen register pressure over the critical limits and had to
// track register pressure for actual scheduling heuristics.
@@ -102,6 +108,8 @@ class GCNSchedStrategy : public GenericScheduler {
SUnit *pickNode(bool &IsTopNode) override;
+ void schedNode(SUnit *SU, bool IsTopNode) override;
+
void initialize(ScheduleDAGMI *DAG) override;
unsigned getTargetOccupancy() { return TargetOccupancy; }
@@ -116,13 +124,19 @@ class GCNSchedStrategy : public GenericScheduler {
bool hasNextStage() const;
GCNSchedStageID getNextStage() const;
+
+ GCNDownwardRPTracker *getTracker() { return &TheTracker; }
+
+ GCNUpwardRPTracker *getUpwardTracker() { return &TheUpwardTracker; }
+
};
/// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
/// maximum number of waves per simd).
class GCNMaxOccupancySchedStrategy final : public GCNSchedStrategy {
public:
- GCNMaxOccupancySchedStrategy(const MachineSchedContext *C);
+ GCNMaxOccupancySchedStrategy(const MachineSchedContext *C,
+ bool IsLegacyScheduler = false);
};
/// The goal of this scheduling strategy is to maximize ILP for a single wave
@@ -350,6 +364,9 @@ class GCNSchedStage {
bool isRegionWithExcessRP() const {
return DAG.RegionsWithExcessRP[RegionIdx];
}
+
+ // The region number this stage is currently working on
+ unsigned getRegionIdx() { return RegionIdx; }
// Returns true if the new schedule may result in more spilling.
bool mayCauseSpilling(unsigned WavesAfter);
>From 02e918d6d93203b1e4d07ec8da00f7d9b55ccf95 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Fri, 14 Jun 2024 14:46:28 -0700
Subject: [PATCH 04/20] Formatting
Change-Id: I1cb0a88e94f4156da6118fcd3724556939351c6d
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 46 +++++++++++----------
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 3 +-
2 files changed, 25 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 4533b61baa72fc..23eb4afd166355 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -66,9 +66,8 @@ static cl::opt<bool> GCNTrackers(
const unsigned ScheduleMetrics::ScaleFactor = 100;
GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
- : GenericScheduler(C), TargetOccupancy(0), MF(nullptr),
- TheTracker(*C->LIS), TheUpwardTracker(*C->LIS),
- HasHighPressure(false) {}
+ : GenericScheduler(C), TargetOccupancy(0), MF(nullptr), TheTracker(*C->LIS),
+ TheUpwardTracker(*C->LIS), HasHighPressure(false) {}
void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
GenericScheduler::initialize(DAG);
@@ -175,8 +174,10 @@ static void getRegisterPressures(bool AtTop,
auto MI = SU->getInstr();
TempTopTracker.advance(MI, true, DAG->getLIS());
- Pressure[AMDGPU::RegisterPressureSets::SReg_32] = TempTopTracker.getPressure().getSGPRNum();
- Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = TempTopTracker.getPressure().getVGPRNum(false);
+ Pressure[AMDGPU::RegisterPressureSets::SReg_32] =
+ TempTopTracker.getPressure().getSGPRNum();
+ Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
+ TempTopTracker.getPressure().getVGPRNum(false);
}
else {
@@ -184,8 +185,10 @@ static void getRegisterPressures(bool AtTop,
auto MI = SU->getInstr();
TempBotTracker.recede(*MI, true);
- Pressure[AMDGPU::RegisterPressureSets::SReg_32] = TempBotTracker.getPressure().getSGPRNum();
- Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = TempBotTracker.getPressure().getVGPRNum(false);
+ Pressure[AMDGPU::RegisterPressureSets::SReg_32] =
+ TempBotTracker.getPressure().getSGPRNum();
+ Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
+ TempBotTracker.getPressure().getVGPRNum(false);
}
}
}
@@ -217,7 +220,8 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
// In EXPENSIVE_CHECKS, we always query RPTracker to verify the results of
// PressureDiffs.
if (AtTop || !canUsePressureDiffs(*SU) || GCNTrackers) {
- getRegisterPressures(AtTop, RPTracker, SU, Pressure, MaxPressure, TheTracker, TheUpwardTracker, DAG);
+ getRegisterPressures(AtTop, RPTracker, SU, Pressure, MaxPressure,
+ TheTracker, TheUpwardTracker, DAG);
} else {
// Reserve 4 slots.
Pressure.resize(4, 0);
@@ -235,11 +239,8 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
#ifdef EXPENSIVE_CHECKS
std::vector<unsigned> CheckPressure, CheckMaxPressure;
-<<<<<<< HEAD
- getRegisterPressures(AtTop, RPTracker, SU, CheckPressure, CheckMaxPressure);
-=======
- getRegisterPressures(AtTop, RPTracker, SU, CheckPressure, CheckMaxPressure,TheTracker,TheUpwardTracker, DAG);
->>>>>>> 3fc6929b4a78... [AMDGPU] Optionally Use AMDGPU RPTrackers during scheduling
+ getRegisterPressures(AtTop, RPTracker, SU, CheckPressure, CheckMaxPressure,
+ TheTracker, TheUpwardTracker, DAG);
if (Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=
CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||
Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=
@@ -837,15 +838,16 @@ void GCNScheduleDAGMILive::runSchedStages() {
if (GCNTrackers) {
GCNDownwardRPTracker *TheTracker = S.getTracker();
GCNUpwardRPTracker *TheUpwardTracker = S.getUpwardTracker();
- GCNRPTracker::LiveRegSet *RegionLiveIns = &LiveIns[Stage->getRegionIdx()];
-
- reinterpret_cast<GCNRPTracker *>(TheTracker)->reset(
- Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
- *RegionLiveIns);
- reinterpret_cast<GCNRPTracker *>(TheUpwardTracker)->reset(
- Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
- RegionLiveOuts.getLiveRegsForRegionIdx(Stage->getRegionIdx()));
-
+ GCNRPTracker::LiveRegSet *RegionLiveIns =
+ &LiveIns[Stage->getRegionIdx()];
+
+ reinterpret_cast<GCNRPTracker *>(TheTracker)
+ ->reset(Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
+ *RegionLiveIns);
+ reinterpret_cast<GCNRPTracker *>(TheUpwardTracker)
+ ->reset(
+ Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
+ RegionLiveOuts.getLiveRegsForRegionIdx(Stage->getRegionIdx()));
}
ScheduleDAGMILive::schedule();
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 8088339fbd26c2..e8c89b2f1baf27 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -128,7 +128,6 @@ class GCNSchedStrategy : public GenericScheduler {
GCNDownwardRPTracker *getTracker() { return &TheTracker; }
GCNUpwardRPTracker *getUpwardTracker() { return &TheUpwardTracker; }
-
};
/// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
@@ -364,7 +363,7 @@ class GCNSchedStage {
bool isRegionWithExcessRP() const {
return DAG.RegionsWithExcessRP[RegionIdx];
}
-
+
// The region number this stage is currently working on
unsigned getRegionIdx() { return RegionIdx; }
>From 8c94313e827273dec2dc49816fde9a2344d3fc20 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 27 May 2024 10:43:43 -0700
Subject: [PATCH 05/20] Actually use the iterative trackers
Change-Id: I198925f5ed91b0a49ac265e19fdbe2208139f09a
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 23eb4afd166355..ead06c2dd6395f 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -172,7 +172,7 @@ static void getRegisterPressures(bool AtTop,
if (AtTop) {
GCNDownwardRPTracker TempTopTracker(TheTracker);
auto MI = SU->getInstr();
- TempTopTracker.advance(MI, true, DAG->getLIS());
+ TempTopTracker.advance(MI, false, DAG->getLIS());
Pressure[AMDGPU::RegisterPressureSets::SReg_32] =
TempTopTracker.getPressure().getSGPRNum();
@@ -183,7 +183,7 @@ static void getRegisterPressures(bool AtTop,
else {
GCNUpwardRPTracker TempBotTracker(TheUpwardTracker);
auto MI = SU->getInstr();
- TempBotTracker.recede(*MI, true);
+ TempBotTracker.recede(*MI, false);
Pressure[AMDGPU::RegisterPressureSets::SReg_32] =
TempBotTracker.getPressure().getSGPRNum();
@@ -489,8 +489,8 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
void GCNSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
if (GCNTrackers) {
MachineInstr *MI = SU->getInstr();
- IsTopNode ? (void)TheTracker.advance(MI, true, DAG->getLIS())
- : TheUpwardTracker.recede(*MI, true);
+ IsTopNode ? (void)TheTracker.advance(MI, false, DAG->getLIS())
+ : TheUpwardTracker.recede(*MI, false);
}
return GenericScheduler::schedNode(SU, IsTopNode);
>From 08560336e957daa7558740764d0a9df68cbca51d Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 28 May 2024 13:24:09 -0700
Subject: [PATCH 06/20] Review Comments
Change-Id: Ifa69110bf0a239ea14d25c0bad03215d1b018656
---
.../Target/AMDGPU/GCNIterativeScheduler.cpp | 2 +-
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 51 +++++++++----------
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 8 +--
3 files changed, 30 insertions(+), 31 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index 79656f5b2b9f48..1929ee6b89f4e4 100644
--- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -480,7 +480,7 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, "
"target occupancy = "
<< TgtOcc << '\n');
- GCNMaxOccupancySchedStrategy LStrgy(Context, /*IsLegacyScheduler*/ true);
+ GCNMaxOccupancySchedStrategy LStrgy(Context, /*IsLegacyScheduler=*/ true);
unsigned FinalOccupancy = std::min(Occ, MFI->getOccupancy());
for (int I = 0; I < NumPasses; ++I) {
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index ead06c2dd6395f..0d649682976057 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -66,8 +66,8 @@ static cl::opt<bool> GCNTrackers(
const unsigned ScheduleMetrics::ScaleFactor = 100;
GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
- : GenericScheduler(C), TargetOccupancy(0), MF(nullptr), TheTracker(*C->LIS),
- TheUpwardTracker(*C->LIS), HasHighPressure(false) {}
+ : GenericScheduler(C), TargetOccupancy(0), MF(nullptr), DownwardTracker(*C->LIS),
+ UpwardTracker(*C->LIS), HasHighPressure(false) {}
void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
GenericScheduler::initialize(DAG);
@@ -157,8 +157,8 @@ static void getRegisterPressures(bool AtTop,
const RegPressureTracker &RPTracker, SUnit *SU,
std::vector<unsigned> &Pressure,
std::vector<unsigned> &MaxPressure,
- GCNDownwardRPTracker &TheTracker,
- GCNUpwardRPTracker &TheUpwardTracker,
+ GCNDownwardRPTracker &DownwardTracker,
+ GCNUpwardRPTracker &UpwardTracker,
ScheduleDAGMI *DAG) {
// getDownwardPressure() and getUpwardPressure() make temporary changes to
// the tracker, so we need to pass those function a non-const copy.
@@ -170,7 +170,7 @@ static void getRegisterPressures(bool AtTop,
TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
} else {
if (AtTop) {
- GCNDownwardRPTracker TempTopTracker(TheTracker);
+ GCNDownwardRPTracker TempTopTracker(DownwardTracker);
auto MI = SU->getInstr();
TempTopTracker.advance(MI, false, DAG->getLIS());
@@ -181,7 +181,7 @@ static void getRegisterPressures(bool AtTop,
}
else {
- GCNUpwardRPTracker TempBotTracker(TheUpwardTracker);
+ GCNUpwardRPTracker TempBotTracker(UpwardTracker);
auto MI = SU->getInstr();
TempBotTracker.recede(*MI, false);
@@ -221,7 +221,7 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
// PressureDiffs.
if (AtTop || !canUsePressureDiffs(*SU) || GCNTrackers) {
getRegisterPressures(AtTop, RPTracker, SU, Pressure, MaxPressure,
- TheTracker, TheUpwardTracker, DAG);
+ DownwardTracker, UpwardTracker, DAG);
} else {
// Reserve 4 slots.
Pressure.resize(4, 0);
@@ -240,7 +240,7 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
#ifdef EXPENSIVE_CHECKS
std::vector<unsigned> CheckPressure, CheckMaxPressure;
getRegisterPressures(AtTop, RPTracker, SU, CheckPressure, CheckMaxPressure,
- TheTracker, TheUpwardTracker, DAG);
+ TheTracker, UpwardTracker, DAG);
if (Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=
CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||
Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=
@@ -330,13 +330,13 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
if (DAG->isTrackingPressure()) {
SGPRPressure =
GCNTrackers
- ? (Zone.isTop() ? TheTracker.getPressure().getSGPRNum()
- : TheUpwardTracker.getPressure().getSGPRNum())
+ ? (Zone.isTop() ? DownwardTracker.getPressure().getSGPRNum()
+ : UpwardTracker.getPressure().getSGPRNum())
: Pressure[AMDGPU::RegisterPressureSets::SReg_32];
VGPRPressure =
GCNTrackers
- ? (Zone.isTop() ? TheTracker.getPressure().getVGPRNum(false)
- : TheUpwardTracker.getPressure().getVGPRNum(false))
+ ? (Zone.isTop() ? DownwardTracker.getPressure().getVGPRNum(false)
+ : UpwardTracker.getPressure().getVGPRNum(false))
: Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
}
ReadyQueue &Q = Zone.Available;
@@ -489,8 +489,8 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
void GCNSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
if (GCNTrackers) {
MachineInstr *MI = SU->getInstr();
- IsTopNode ? (void)TheTracker.advance(MI, false, DAG->getLIS())
- : TheUpwardTracker.recede(*MI, false);
+ IsTopNode ? (void)DownwardTracker.advance(MI, false, DAG->getLIS())
+ : UpwardTracker.recede(*MI, false);
}
return GenericScheduler::schedNode(SU, IsTopNode);
@@ -836,18 +836,17 @@ void GCNScheduleDAGMILive::runSchedStages() {
}
if (GCNTrackers) {
- GCNDownwardRPTracker *TheTracker = S.getTracker();
- GCNUpwardRPTracker *TheUpwardTracker = S.getUpwardTracker();
- GCNRPTracker::LiveRegSet *RegionLiveIns =
- &LiveIns[Stage->getRegionIdx()];
-
- reinterpret_cast<GCNRPTracker *>(TheTracker)
- ->reset(Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
- *RegionLiveIns);
- reinterpret_cast<GCNRPTracker *>(TheUpwardTracker)
- ->reset(
- Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
- RegionLiveOuts.getLiveRegsForRegionIdx(Stage->getRegionIdx()));
+ GCNDownwardRPTracker *DownwardTracker = S.getDownwardTracker();
+ GCNUpwardRPTracker *UpwardTracker = S.getUpwardTracker();
+ GCNRPTracker::LiveRegSet *RegionLiveIns = &LiveIns[Stage->getRegionIdx()];
+
+ reinterpret_cast<GCNRPTracker *>(DownwardTracker)->reset(
+ Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
+ *RegionLiveIns);
+ reinterpret_cast<GCNRPTracker *>(UpwardTracker)->reset(
+ Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
+ RegionLiveOuts.getLiveRegsForRegionIdx(Stage->getRegionIdx()));
+
}
ScheduleDAGMILive::schedule();
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index e8c89b2f1baf27..91b4c0c63d2bb3 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -71,10 +71,10 @@ class GCNSchedStrategy : public GenericScheduler {
SmallVectorImpl<GCNSchedStageID>::iterator CurrentStage = nullptr;
// GCN RP Tracker for top-down scheduling
- mutable GCNDownwardRPTracker TheTracker;
+ mutable GCNDownwardRPTracker DownwardTracker;
// GCN RP Tracker for botttom-up scheduling
- mutable GCNUpwardRPTracker TheUpwardTracker;
+ mutable GCNUpwardRPTracker UpwardTracker;
public:
// schedule() have seen register pressure over the critical limits and had to
@@ -125,9 +125,9 @@ class GCNSchedStrategy : public GenericScheduler {
GCNSchedStageID getNextStage() const;
- GCNDownwardRPTracker *getTracker() { return &TheTracker; }
+ GCNDownwardRPTracker *getDownwardTracker() { return &DownwardTracker; }
- GCNUpwardRPTracker *getUpwardTracker() { return &TheUpwardTracker; }
+ GCNUpwardRPTracker *getUpwardTracker() { return &UpwardTracker; }
};
/// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
>From 1d2c412ecda697efdd27d5f2fde4cbe982d5123f Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 28 May 2024 13:29:41 -0700
Subject: [PATCH 07/20] Use DAG.MRI
Change-Id: I9f0275a0cede9e77dfd29262124f2a856f436c8c
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 12 +++++-------
1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 0d649682976057..e1bbb7ccd1312c 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -840,13 +840,11 @@ void GCNScheduleDAGMILive::runSchedStages() {
GCNUpwardRPTracker *UpwardTracker = S.getUpwardTracker();
GCNRPTracker::LiveRegSet *RegionLiveIns = &LiveIns[Stage->getRegionIdx()];
- reinterpret_cast<GCNRPTracker *>(DownwardTracker)->reset(
- Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
- *RegionLiveIns);
- reinterpret_cast<GCNRPTracker *>(UpwardTracker)->reset(
- Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
- RegionLiveOuts.getLiveRegsForRegionIdx(Stage->getRegionIdx()));
-
+ reinterpret_cast<GCNRPTracker *>(DownwardTracker)
+ ->reset(MRI, *RegionLiveIns);
+ reinterpret_cast<GCNRPTracker *>(UpwardTracker)
+ ->reset(MRI, RegionLiveOuts.getLiveRegsForRegionIdx(
+ Stage->getRegionIdx()));
}
ScheduleDAGMILive::schedule();
>From 9be299795e80d44c096dc3663fd8d01d47abe555 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 28 May 2024 13:52:29 -0700
Subject: [PATCH 08/20] Formatting
Change-Id: I74c19a2cf20d2325178933f81e0e8716d7c62f17
---
llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp | 2 +-
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 15 ++++++++-------
2 files changed, 9 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index 1929ee6b89f4e4..085eb8e37e3cd2 100644
--- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -480,7 +480,7 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, "
"target occupancy = "
<< TgtOcc << '\n');
- GCNMaxOccupancySchedStrategy LStrgy(Context, /*IsLegacyScheduler=*/ true);
+ GCNMaxOccupancySchedStrategy LStrgy(Context, /*IsLegacyScheduler=*/true);
unsigned FinalOccupancy = std::min(Occ, MFI->getOccupancy());
for (int I = 0; I < NumPasses; ++I) {
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index e1bbb7ccd1312c..cc03b69a614704 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -66,8 +66,9 @@ static cl::opt<bool> GCNTrackers(
const unsigned ScheduleMetrics::ScaleFactor = 100;
GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
- : GenericScheduler(C), TargetOccupancy(0), MF(nullptr), DownwardTracker(*C->LIS),
- UpwardTracker(*C->LIS), HasHighPressure(false) {}
+ : GenericScheduler(C), TargetOccupancy(0), MF(nullptr),
+ DownwardTracker(*C->LIS), UpwardTracker(*C->LIS), HasHighPressure(false) {
+}
void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
GenericScheduler::initialize(DAG);
@@ -329,10 +330,9 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
unsigned VGPRPressure = 0;
if (DAG->isTrackingPressure()) {
SGPRPressure =
- GCNTrackers
- ? (Zone.isTop() ? DownwardTracker.getPressure().getSGPRNum()
- : UpwardTracker.getPressure().getSGPRNum())
- : Pressure[AMDGPU::RegisterPressureSets::SReg_32];
+ GCNTrackers ? (Zone.isTop() ? DownwardTracker.getPressure().getSGPRNum()
+ : UpwardTracker.getPressure().getSGPRNum())
+ : Pressure[AMDGPU::RegisterPressureSets::SReg_32];
VGPRPressure =
GCNTrackers
? (Zone.isTop() ? DownwardTracker.getPressure().getVGPRNum(false)
@@ -838,7 +838,8 @@ void GCNScheduleDAGMILive::runSchedStages() {
if (GCNTrackers) {
GCNDownwardRPTracker *DownwardTracker = S.getDownwardTracker();
GCNUpwardRPTracker *UpwardTracker = S.getUpwardTracker();
- GCNRPTracker::LiveRegSet *RegionLiveIns = &LiveIns[Stage->getRegionIdx()];
+ GCNRPTracker::LiveRegSet *RegionLiveIns =
+ &LiveIns[Stage->getRegionIdx()];
reinterpret_cast<GCNRPTracker *>(DownwardTracker)
->reset(MRI, *RegionLiveIns);
>From f9b5af5f20796652e6f2f88186e64d59698ec299 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Fri, 14 Jun 2024 15:03:02 -0700
Subject: [PATCH 09/20] Review comments
Change-Id: I09f9ca74c07b516daed0e93a85937df8b9aa922b
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 19 ++++++++++---------
1 file changed, 10 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index cc03b69a614704..b9b701ba538fb2 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -329,15 +329,16 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
unsigned SGPRPressure = 0;
unsigned VGPRPressure = 0;
if (DAG->isTrackingPressure()) {
- SGPRPressure =
- GCNTrackers ? (Zone.isTop() ? DownwardTracker.getPressure().getSGPRNum()
- : UpwardTracker.getPressure().getSGPRNum())
- : Pressure[AMDGPU::RegisterPressureSets::SReg_32];
- VGPRPressure =
- GCNTrackers
- ? (Zone.isTop() ? DownwardTracker.getPressure().getVGPRNum(false)
- : UpwardTracker.getPressure().getVGPRNum(false))
- : Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
+ if (!GCNTrackers) {
+ SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
+ VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
+ } else {
+ GCNRPTracker *T = &UpwardTracker;
+ if (Zone.isTop())
+ T = &DownwardTracker;
+ SGPRPressure = T->getPressure().getSGPRNum();
+ VGPRPressure = T->getPressure().getVGPRNum(false);
+ }
}
ReadyQueue &Q = Zone.Available;
for (SUnit *SU : Q) {
>From c4d8ebb57604387bc3019c4650b70e0a00683491 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Fri, 14 Jun 2024 16:14:57 -0700
Subject: [PATCH 10/20] Allocate Pressure vector
Change-Id: I5effce973fa2d945076e89b4453a844f0fc85fc9
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index b9b701ba538fb2..48d0d6e8d66676 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -170,6 +170,7 @@ static void getRegisterPressures(bool AtTop,
else
TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
} else {
+ Pressure.resize(4, 0);
if (AtTop) {
GCNDownwardRPTracker TempTopTracker(DownwardTracker);
auto MI = SU->getInstr();
>From fcce7ffe7f438a386f2e2153f397b0bcd1ebf5b6 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 18 Jun 2024 11:39:48 -0700
Subject: [PATCH 11/20] Remove flag from upward RPTracker
Change-Id: I6217c03f56d34f584e5b23cf7c4462842bc7173b
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 48d0d6e8d66676..27003cdd67d598 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -185,7 +185,7 @@ static void getRegisterPressures(bool AtTop,
else {
GCNUpwardRPTracker TempBotTracker(UpwardTracker);
auto MI = SU->getInstr();
- TempBotTracker.recede(*MI, false);
+ TempBotTracker.recede(*MI);
Pressure[AMDGPU::RegisterPressureSets::SReg_32] =
TempBotTracker.getPressure().getSGPRNum();
@@ -492,7 +492,7 @@ void GCNSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
if (GCNTrackers) {
MachineInstr *MI = SU->getInstr();
IsTopNode ? (void)DownwardTracker.advance(MI, false, DAG->getLIS())
- : UpwardTracker.recede(*MI, false);
+ : UpwardTracker.recede(*MI);
}
return GenericScheduler::schedNode(SU, IsTopNode);
>From 0e70bac9e055553da6940dcad5b658f1927ff36c Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Wed, 19 Jun 2024 11:45:32 -0700
Subject: [PATCH 12/20] Review comments
Change-Id: Ibeaba6cab034636472b20c36adfadabbbc2c19ef
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 53 ++++++++++-----------
1 file changed, 25 insertions(+), 28 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 27003cdd67d598..8976ae111037f3 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -165,33 +165,30 @@ static void getRegisterPressures(bool AtTop,
// the tracker, so we need to pass those function a non-const copy.
RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
if (!GCNTrackers) {
- if (AtTop)
- TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
- else
- TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
- } else {
- Pressure.resize(4, 0);
- if (AtTop) {
- GCNDownwardRPTracker TempTopTracker(DownwardTracker);
- auto MI = SU->getInstr();
- TempTopTracker.advance(MI, false, DAG->getLIS());
-
- Pressure[AMDGPU::RegisterPressureSets::SReg_32] =
- TempTopTracker.getPressure().getSGPRNum();
- Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
- TempTopTracker.getPressure().getVGPRNum(false);
- }
+ AtTop
+ ? TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure)
+ : TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
- else {
- GCNUpwardRPTracker TempBotTracker(UpwardTracker);
- auto MI = SU->getInstr();
- TempBotTracker.recede(*MI);
+ return;
+ }
- Pressure[AMDGPU::RegisterPressureSets::SReg_32] =
- TempBotTracker.getPressure().getSGPRNum();
- Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
- TempBotTracker.getPressure().getVGPRNum(false);
- }
+ // GCNTrackers
+ Pressure.resize(4, 0);
+ MachineInstr *MI = SU->getInstr();
+ if (AtTop) {
+ GCNDownwardRPTracker TempDownwardTracker(DownwardTracker);
+ TempDownwardTracker.advance(MI, false, DAG->getLIS());
+ Pressure[AMDGPU::RegisterPressureSets::SReg_32] =
+ TempDownwardTracker.getPressure().getSGPRNum();
+ Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
+ TempDownwardTracker.getPressure().getVGPRNum(false);
+ } else {
+ GCNUpwardRPTracker TempUpwardTracker(UpwardTracker);
+ TempUpwardTracker.recede(*MI);
+ Pressure[AMDGPU::RegisterPressureSets::SReg_32] =
+ TempUpwardTracker.getPressure().getSGPRNum();
+ Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
+ TempUpwardTracker.getPressure().getVGPRNum(false);
}
}
@@ -334,9 +331,9 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
} else {
- GCNRPTracker *T = &UpwardTracker;
- if (Zone.isTop())
- T = &DownwardTracker;
+ GCNRPTracker *T = Zone.isTop()
+ ? static_cast<GCNRPTracker *>(&UpwardTracker)
+ : static_cast<GCNRPTracker *>(&DownwardTracker);
SGPRPressure = T->getPressure().getSGPRNum();
VGPRPressure = T->getPressure().getVGPRNum(false);
}
>From 415e531e98a99b052edf9f788a674fa35ab8cbd9 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Thu, 20 Jun 2024 08:49:26 -0700
Subject: [PATCH 13/20] Dont modify existing PreRARematStage LiveIn handling
Change-Id: I96c99f12c59ef0eea86f7fbf134913ecc47dd6f2
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 8976ae111037f3..8c2b7ffb3f202e 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1669,6 +1669,9 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
MachineInstr *MI = Entry.first;
MachineInstr *OldMI = Entry.second;
+ // Remove OldMI from BBLiveInMap since we are sinking it from its MBB.
+ DAG.BBLiveInMap.erase(OldMI);
+
// Remove OldMI and update LIS
Register Reg = MI->getOperand(0).getReg();
LIS->RemoveMachineInstrFromMaps(*OldMI);
@@ -1686,8 +1689,6 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
DAG.Regions = NewRegions;
DAG.RescheduleRegions = NewRescheduleRegions;
- DAG.BBLiveInMap = DAG.getBBLiveInMap();
-
if (GCNTrackers)
DAG.RegionLiveOuts.buildLiveRegMap();
>From 5ce0ab561d7f0ea1b1a1f450101b9bf7f53724ce Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 12 Aug 2024 13:55:44 -0700
Subject: [PATCH 14/20] Use GCNTracker RP speculation
Change-Id: I3e893ca2ffcf1032fe157b537c9563565215b123
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 8c2b7ffb3f202e..8c50f5b35d122b 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -177,18 +177,18 @@ static void getRegisterPressures(bool AtTop,
MachineInstr *MI = SU->getInstr();
if (AtTop) {
GCNDownwardRPTracker TempDownwardTracker(DownwardTracker);
- TempDownwardTracker.advance(MI, false, DAG->getLIS());
+ TempDownwardTracker.bumpDownwardPressure(MI);
Pressure[AMDGPU::RegisterPressureSets::SReg_32] =
TempDownwardTracker.getPressure().getSGPRNum();
Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
- TempDownwardTracker.getPressure().getVGPRNum(false);
+ TempDownwardTracker.getPressure().getArchVGPRNum();
} else {
GCNUpwardRPTracker TempUpwardTracker(UpwardTracker);
- TempUpwardTracker.recede(*MI);
+ TempUpwardTracker.bumpUpwardPressure(MI);
Pressure[AMDGPU::RegisterPressureSets::SReg_32] =
TempUpwardTracker.getPressure().getSGPRNum();
Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
- TempUpwardTracker.getPressure().getVGPRNum(false);
+ TempUpwardTracker.getPressure().getArchVGPRNum();
}
}
>From 11203f3a7ba22d13d80bbb0dc12d324df2982542 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 20 Aug 2024 12:29:33 -0700
Subject: [PATCH 15/20] Port changes from pull/93088
Change-Id: I2de464b32d3c6ed9a77cbbc669d735dde63c2e47
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 45 +++++++++++++----------
1 file changed, 25 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index c6bffc64401368..450406f036b5c6 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -298,11 +298,11 @@ static LaneBitmask getRegLanes(ArrayRef<RegisterMaskPair> RegUnits,
return I->LaneMask;
}
-static LaneBitmask
-getLanesWithProperty(const LiveIntervals &LIS, const MachineRegisterInfo &MRI,
- bool TrackLaneMasks, Register RegUnit, SlotIndex Pos,
- LaneBitmask SafeDefault,
- bool (*Property)(const LiveRange &LR, SlotIndex Pos)) {
+static LaneBitmask getLanesWithProperty(
+ const LiveIntervals &LIS, const MachineRegisterInfo &MRI,
+ bool TrackLaneMasks, Register RegUnit, SlotIndex Pos,
+ LaneBitmask SafeDefault,
+ function_ref<bool(const LiveRange &LR, SlotIndex Pos)> Property) {
if (RegUnit.isVirtual()) {
const LiveInterval &LI = LIS.getInterval(RegUnit);
LaneBitmask Result;
@@ -317,14 +317,14 @@ getLanesWithProperty(const LiveIntervals &LIS, const MachineRegisterInfo &MRI,
}
return Result;
- } else {
- const LiveRange *LR = LIS.getCachedRegUnit(RegUnit);
- // Be prepared for missing liveranges: We usually do not compute liveranges
- // for physical registers on targets with many registers (GPUs).
- if (LR == nullptr)
- return SafeDefault;
- return Property(*LR, Pos) ? LaneBitmask::getAll() : LaneBitmask::getNone();
}
+
+ const LiveRange *LR = LIS.getCachedRegUnit(RegUnit);
+ // Be prepared for missing liveranges: We usually do not compute liveranges
+ // for physical registers on targets with many registers (GPUs).
+ if (LR == nullptr)
+ return SafeDefault;
+ return Property(*LR, Pos) ? LaneBitmask::getAll() : LaneBitmask::getNone();
}
/// Helper to find a vreg use between two indices [PriorUseIdx, NextUseIdx).
@@ -333,19 +333,21 @@ getLanesWithProperty(const LiveIntervals &LIS, const MachineRegisterInfo &MRI,
static LaneBitmask findUseBetween(unsigned Reg, LaneBitmask LastUseMask,
SlotIndex PriorUseIdx, SlotIndex NextUseIdx,
const MachineRegisterInfo &MRI,
+ const SIRegisterInfo *TRI,
const LiveIntervals *LIS,
bool Upward = false) {
- const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
- for (const MachineOperand &MO : MRI.use_nodbg_operands(Reg)) {
+ for (const MachineOperand &MO : MRI.reg_nodbg_operands(Reg)) {
if (MO.isUndef())
continue;
+ if (!MO.readsReg())
+ continue;
const MachineInstr *MI = MO.getParent();
SlotIndex InstSlot = LIS->getInstructionIndex(*MI).getRegSlot();
bool InRange = Upward ? (InstSlot > PriorUseIdx && InstSlot <= NextUseIdx)
: (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx);
if (InRange) {
unsigned SubRegIdx = MO.getSubReg();
- LaneBitmask UseMask = TRI.getSubRegIndexLaneMask(SubRegIdx);
+ LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(SubRegIdx);
LastUseMask &= ~UseMask;
if (LastUseMask.none())
return LaneBitmask::getNone();
@@ -517,7 +519,9 @@ void GCNUpwardRPTracker::bumpUpwardPressure(const MachineInstr *MI) {
// Account for register pressure similar to RegPressureTracker::recede().
RegisterOperands RegOpers;
- const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+
+ const SIRegisterInfo *TRI =
+ MI->getMF()->getSubtarget<GCNSubtarget>().getRegisterInfo();
RegOpers.collect(*MI, *TRI, *MRI, true, /*IgnoreDead=*/true);
assert(RegOpers.DeadDefs.empty());
RegOpers.adjustLaneLiveness(LIS, *MRI, SlotIdx);
@@ -558,8 +562,8 @@ void GCNUpwardRPTracker::bumpUpwardPressure(const MachineInstr *MI) {
LastTrackedMI ? LIS.getInstructionIndex(*LastTrackedMI).getRegSlot()
: LIS.getMBBEndIdx(MI->getParent());
;
- LaneBitmask LastUseMask =
- findUseBetween(Reg, P.LaneMask, SlotIdx, CurrIdx, *MRI, &LIS, true);
+ LaneBitmask LastUseMask = findUseBetween(Reg, P.LaneMask, SlotIdx, CurrIdx,
+ *MRI, TRI, &LIS, true);
LastUseMask &= ~LiveAfter;
LaneBitmask LiveBefore = (LiveAfter | LastUseMask);
CurPressure.inc(Reg, LiveAfter, LiveBefore, *MRI);
@@ -733,7 +737,8 @@ void GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI) {
// Account for register pressure similar to RegPressureTracker::recede().
RegisterOperands RegOpers;
- const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ const SIRegisterInfo *TRI =
+ MI->getMF()->getSubtarget<GCNSubtarget>().getRegisterInfo();
RegOpers.collect(*MI, *TRI, *MRI, true, /*IgnoreDead=*/false);
RegOpers.adjustLaneLiveness(LIS, *MRI, SlotIdx);
@@ -760,7 +765,7 @@ void GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI) {
}
LastUseMask =
- findUseBetween(Reg, LastUseMask, CurrIdx, SlotIdx, *MRI, &LIS);
+ findUseBetween(Reg, LastUseMask, CurrIdx, SlotIdx, *MRI, TRI, &LIS);
if (LastUseMask.none())
continue;
>From 97db186f9413591f70e93075b18c08702a750d81 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Wed, 21 Aug 2024 15:16:05 -0700
Subject: [PATCH 16/20] Port changes from pull/93088
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 16 +++++-----------
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 4 ++--
2 files changed, 7 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 450406f036b5c6..e2db40eda07b05 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -320,8 +320,6 @@ static LaneBitmask getLanesWithProperty(
}
const LiveRange *LR = LIS.getCachedRegUnit(RegUnit);
- // Be prepared for missing liveranges: We usually do not compute liveranges
- // for physical registers on targets with many registers (GPUs).
if (LR == nullptr)
return SafeDefault;
return Property(*LR, Pos) ? LaneBitmask::getAll() : LaneBitmask::getNone();
@@ -336,11 +334,9 @@ static LaneBitmask findUseBetween(unsigned Reg, LaneBitmask LastUseMask,
const SIRegisterInfo *TRI,
const LiveIntervals *LIS,
bool Upward = false) {
- for (const MachineOperand &MO : MRI.reg_nodbg_operands(Reg)) {
+ for (const MachineOperand &MO : MRI.use_nodbg_operands(Reg)) {
if (MO.isUndef())
continue;
- if (!MO.readsReg())
- continue;
const MachineInstr *MI = MO.getParent();
SlotIndex InstSlot = LIS->getInstructionIndex(*MI).getRegSlot();
bool InRange = Upward ? (InstSlot > PriorUseIdx && InstSlot <= NextUseIdx)
@@ -512,7 +508,8 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
assert(CurPressure == getRegPressure(*MRI, LiveRegs));
}
-void GCNUpwardRPTracker::bumpUpwardPressure(const MachineInstr *MI) {
+void GCNUpwardRPTracker::bumpUpwardPressure(const MachineInstr *MI,
+ const SIRegisterInfo *TRI) {
assert(!MI->isDebugOrPseudoInstr() && "Expect a nondebug instruction.");
SlotIndex SlotIdx = LIS.getInstructionIndex(*MI).getRegSlot();
@@ -520,8 +517,6 @@ void GCNUpwardRPTracker::bumpUpwardPressure(const MachineInstr *MI) {
// Account for register pressure similar to RegPressureTracker::recede().
RegisterOperands RegOpers;
- const SIRegisterInfo *TRI =
- MI->getMF()->getSubtarget<GCNSubtarget>().getRegisterInfo();
RegOpers.collect(*MI, *TRI, *MRI, true, /*IgnoreDead=*/true);
assert(RegOpers.DeadDefs.empty());
RegOpers.adjustLaneLiveness(LIS, *MRI, SlotIdx);
@@ -729,7 +724,8 @@ Printable llvm::reportMismatch(const GCNRPTracker::LiveRegSet &LISLR,
});
}
-void GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI) {
+void GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI,
+ const SIRegisterInfo *TRI) {
assert(!MI->isDebugOrPseudoInstr() && "Expect a nondebug instruction.");
SlotIndex SlotIdx;
@@ -737,8 +733,6 @@ void GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI) {
// Account for register pressure similar to RegPressureTracker::recede().
RegisterOperands RegOpers;
- const SIRegisterInfo *TRI =
- MI->getMF()->getSubtarget<GCNSubtarget>().getRegisterInfo();
RegOpers.collect(*MI, *TRI, *MRI, true, /*IgnoreDead=*/false);
RegOpers.adjustLaneLiveness(LIS, *MRI, SlotIdx);
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index f78e4d7da0a1dd..5f9434f91efc64 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -222,7 +222,7 @@ class GCNUpwardRPTracker : public GCNRPTracker {
/// to false allows for an externally managed iterator / program order.
void recede(const MachineInstr &MI);
- void bumpUpwardPressure(const MachineInstr *MI);
+ void bumpUpwardPressure(const MachineInstr *MI, const SIRegisterInfo *TRI);
/// \p returns whether the tracker's state after receding MI corresponds
/// to reported by LIS.
@@ -306,7 +306,7 @@ class GCNDownwardRPTracker : public GCNRPTracker {
MachineBasicBlock::const_iterator End,
const LiveRegSet *LiveRegsCopy = nullptr);
- void bumpDownwardPressure(const MachineInstr *MI);
+ void bumpDownwardPressure(const MachineInstr *MI, const SIRegisterInfo *TRI);
};
LaneBitmask getLiveLaneMask(unsigned Reg,
>From 6927ee8f9f453d9f5191acf6d519171388a625e3 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Wed, 21 Aug 2024 15:34:33 -0700
Subject: [PATCH 17/20] Feed SIRegisterInfo to Trackers + Propagate unused AGPR
speculative pressure + Use correct previous VGPR pressure
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 26 +++++++++++----------
1 file changed, 14 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 8c50f5b35d122b..40d17f8e0395ef 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -154,13 +154,11 @@ static bool canUsePressureDiffs(const SUnit &SU) {
return true;
}
-static void getRegisterPressures(bool AtTop,
- const RegPressureTracker &RPTracker, SUnit *SU,
- std::vector<unsigned> &Pressure,
- std::vector<unsigned> &MaxPressure,
- GCNDownwardRPTracker &DownwardTracker,
- GCNUpwardRPTracker &UpwardTracker,
- ScheduleDAGMI *DAG) {
+static void getRegisterPressures(
+ bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU,
+ std::vector<unsigned> &Pressure, std::vector<unsigned> &MaxPressure,
+ GCNDownwardRPTracker &DownwardTracker, GCNUpwardRPTracker &UpwardTracker,
+ ScheduleDAGMI *DAG, const SIRegisterInfo *SRI) {
// getDownwardPressure() and getUpwardPressure() make temporary changes to
// the tracker, so we need to pass those function a non-const copy.
RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
@@ -177,18 +175,22 @@ static void getRegisterPressures(bool AtTop,
MachineInstr *MI = SU->getInstr();
if (AtTop) {
GCNDownwardRPTracker TempDownwardTracker(DownwardTracker);
- TempDownwardTracker.bumpDownwardPressure(MI);
+ TempDownwardTracker.bumpDownwardPressure(MI, SRI);
Pressure[AMDGPU::RegisterPressureSets::SReg_32] =
TempDownwardTracker.getPressure().getSGPRNum();
Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
TempDownwardTracker.getPressure().getArchVGPRNum();
+ Pressure[AMDGPU::RegisterPressureSets::AGPR_32] =
+ TempDownwardTracker.getPressure().getAGPRNum();
} else {
GCNUpwardRPTracker TempUpwardTracker(UpwardTracker);
- TempUpwardTracker.bumpUpwardPressure(MI);
+ TempUpwardTracker.bumpUpwardPressure(MI, SRI);
Pressure[AMDGPU::RegisterPressureSets::SReg_32] =
TempUpwardTracker.getPressure().getSGPRNum();
Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
TempUpwardTracker.getPressure().getArchVGPRNum();
+ Pressure[AMDGPU::RegisterPressureSets::AGPR_32] =
+ TempDownwardTracker.getPressure().getAGPRNum();
}
}
@@ -220,7 +222,7 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
// PressureDiffs.
if (AtTop || !canUsePressureDiffs(*SU) || GCNTrackers) {
getRegisterPressures(AtTop, RPTracker, SU, Pressure, MaxPressure,
- DownwardTracker, UpwardTracker, DAG);
+ DownwardTracker, UpwardTracker, DAG, SRI);
} else {
// Reserve 4 slots.
Pressure.resize(4, 0);
@@ -239,7 +241,7 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
#ifdef EXPENSIVE_CHECKS
std::vector<unsigned> CheckPressure, CheckMaxPressure;
getRegisterPressures(AtTop, RPTracker, SU, CheckPressure, CheckMaxPressure,
- TheTracker, UpwardTracker, DAG);
+ TheTracker, UpwardTracker, DAG, SRI);
if (Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=
CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||
Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=
@@ -335,7 +337,7 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
? static_cast<GCNRPTracker *>(&UpwardTracker)
: static_cast<GCNRPTracker *>(&DownwardTracker);
SGPRPressure = T->getPressure().getSGPRNum();
- VGPRPressure = T->getPressure().getVGPRNum(false);
+ VGPRPressure = T->getPressure().getArchVGPRNum();
}
}
ReadyQueue &Q = Zone.Available;
>From 3f216f5f52fcc96fc5763688c235a81bbeb94f35 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Thu, 5 Sep 2024 08:24:43 -0700
Subject: [PATCH 18/20] Review comments
Change-Id: I286c9ed1ae91a68da881c6fa27f5f391102d0a9c
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 68 +++++++++++++--------
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 11 ++++
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 2 +-
3 files changed, 54 insertions(+), 27 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index e2db40eda07b05..eae33c63cba3b3 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -288,6 +288,7 @@ collectVirtualRegUses(SmallVectorImpl<RegisterMaskPair> &RegMaskPairs,
}
}
+/// Mostly copy/paste from CodeGen/RegisterPressure.cpp
static LaneBitmask getRegLanes(ArrayRef<RegisterMaskPair> RegUnits,
Register RegUnit) {
auto I = llvm::find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
@@ -298,6 +299,7 @@ static LaneBitmask getRegLanes(ArrayRef<RegisterMaskPair> RegUnits,
return I->LaneMask;
}
+/// Mostly copy/paste from CodeGen/RegisterPressure.cpp
static LaneBitmask getLanesWithProperty(
const LiveIntervals &LIS, const MachineRegisterInfo &MRI,
bool TrackLaneMasks, Register RegUnit, SlotIndex Pos,
@@ -325,6 +327,7 @@ static LaneBitmask getLanesWithProperty(
return Property(*LR, Pos) ? LaneBitmask::getAll() : LaneBitmask::getNone();
}
+/// Mostly copy/paste from CodeGen/RegisterPressure.cpp
/// Helper to find a vreg use between two indices [PriorUseIdx, NextUseIdx).
/// The query starts with a lane bitmask which gets lanes/bits removed for every
/// use we find.
@@ -352,6 +355,35 @@ static LaneBitmask findUseBetween(unsigned Reg, LaneBitmask LastUseMask,
return LastUseMask;
}
+/// Mostly copy/paste from CodeGen/RegisterPressure.cpp
+static LaneBitmask getLiveLanesAt(const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI,
+ bool TrackLaneMasks, Register RegUnit,
+ SlotIndex Pos) {
+ return getLanesWithProperty(
+ LIS, MRI, TrackLaneMasks, RegUnit, Pos, LaneBitmask::getAll(),
+ [](const LiveRange &LR, SlotIndex Pos) { return LR.liveAt(Pos); });
+}
+
+// Copy/paste from RegisterPressure.cpp (RegisterOperands::adjustLaneLiveness)
+static void adjustDefLaneLiveness(SmallVectorImpl<RegisterMaskPair> &Defs,
+ SlotIndex &Pos, const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI) {
+ for (auto *I = Defs.begin(); I != Defs.end();) {
+ LaneBitmask LiveAfter =
+ getLiveLanesAt(LIS, MRI, true, I->RegUnit, Pos.getDeadSlot());
+ // If the def is all that is live after the instruction, then in case
+ // of a subregister def we need a read-undef flag.
+ LaneBitmask ActualDef = I->LaneMask & LiveAfter;
+ if (ActualDef.none()) {
+ I = Defs.erase(I);
+ } else {
+ I->LaneMask = ActualDef;
+ ++I;
+ }
+ }
+}
+
///////////////////////////////////////////////////////////////////////////////
// GCNRPTracker
@@ -416,6 +448,7 @@ void GCNRPTracker::reset(const MachineRegisterInfo &MRI_,
}
void GCNRPTracker::bumpDeadDefs(ArrayRef<RegisterMaskPair> DeadDefs) {
+ GCNRegPressure TempPressure = CurPressure;
for (const RegisterMaskPair &P : DeadDefs) {
Register Reg = P.RegUnit;
if (!Reg.isVirtual())
@@ -425,16 +458,9 @@ void GCNRPTracker::bumpDeadDefs(ArrayRef<RegisterMaskPair> DeadDefs) {
CurPressure.inc(Reg, LiveMask, BumpedMask, *MRI);
}
MaxPressure = max(MaxPressure, CurPressure);
- for (const RegisterMaskPair &P : DeadDefs) {
- Register Reg = P.RegUnit;
- if (!Reg.isVirtual())
- continue;
- LaneBitmask LiveMask = LiveRegs[Reg];
- LaneBitmask BumpedMask = LiveMask | P.LaneMask;
- CurPressure.inc(Reg, BumpedMask, LiveMask, *MRI);
- }
+ CurPressure = TempPressure;
}
-
+/// Mostly copy/paste from CodeGen/RegisterPressure.cpp
LaneBitmask GCNRPTracker::getLastUsedLanes(Register RegUnit,
SlotIndex Pos) const {
return getLanesWithProperty(
@@ -519,7 +545,7 @@ void GCNUpwardRPTracker::bumpUpwardPressure(const MachineInstr *MI,
RegOpers.collect(*MI, *TRI, *MRI, true, /*IgnoreDead=*/true);
assert(RegOpers.DeadDefs.empty());
- RegOpers.adjustLaneLiveness(LIS, *MRI, SlotIdx);
+ adjustDefLaneLiveness(RegOpers.Defs, SlotIdx, LIS, *MRI);
RegOpers.detectDeadDefs(*MI, LIS);
// Boost max pressure for all dead defs together.
@@ -536,11 +562,7 @@ void GCNUpwardRPTracker::bumpUpwardPressure(const MachineInstr *MI,
LaneBitmask DefLanes = P.LaneMask;
LaneBitmask LiveBefore = (LiveAfter & ~DefLanes) | UseLanes;
- // There may be parts of the register that were dead before the
- // instruction, but became live afterwards. Similarly, some parts
- // may have been killed in this instruction.
CurPressure.inc(Reg, LiveAfter, LiveAfter & LiveBefore, *MRI);
- CurPressure.inc(Reg, LiveAfter, ~LiveAfter & LiveBefore, *MRI);
MaxPressure = max(MaxPressure, CurPressure);
}
// Generate liveness for uses.
@@ -548,19 +570,8 @@ void GCNUpwardRPTracker::bumpUpwardPressure(const MachineInstr *MI,
Register Reg = P.RegUnit;
if (!Reg.isVirtual())
continue;
- // If this register was also in a def operand, we've handled it
- // with defs.
- if (getRegLanes(RegOpers.Defs, Reg).any())
- continue;
LaneBitmask LiveAfter = LiveRegs[Reg];
- SlotIndex CurrIdx =
- LastTrackedMI ? LIS.getInstructionIndex(*LastTrackedMI).getRegSlot()
- : LIS.getMBBEndIdx(MI->getParent());
- ;
- LaneBitmask LastUseMask = findUseBetween(Reg, P.LaneMask, SlotIdx, CurrIdx,
- *MRI, TRI, &LIS, true);
- LastUseMask &= ~LiveAfter;
- LaneBitmask LiveBefore = (LiveAfter | LastUseMask);
+ LaneBitmask LiveBefore = LiveAfter | P.LaneMask;
CurPressure.inc(Reg, LiveAfter, LiveBefore, *MRI);
}
MaxPressure = max(MaxPressure, CurPressure);
@@ -681,8 +692,13 @@ bool GCNDownwardRPTracker::advance(MachineInstr *MI, bool UseInternalIterator,
LiveIntervals *TheLIS) {
if (UseInternalIterator && NextMI == MBBEnd)
return false;
+
advanceBeforeNext(MI, UseInternalIterator, TheLIS);
advanceToNext(MI, UseInternalIterator);
+ if (!UseInternalIterator) {
+ // We must remove any dead def lanes from the current RP
+ advanceBeforeNext(MI, true, TheLIS);
+ }
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 5f9434f91efc64..463da472bb69ff 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -169,6 +169,7 @@ class GCNRPTracker {
void reset(const MachineInstr &MI, const LiveRegSet *LiveRegsCopy,
bool After);
+ /// Mostly copy/paste from CodeGen/RegisterPressure.cpp
void bumpDeadDefs(ArrayRef<RegisterMaskPair> DeadDefs);
LaneBitmask getLastUsedLanes(Register RegUnit, SlotIndex Pos) const;
@@ -222,6 +223,11 @@ class GCNUpwardRPTracker : public GCNRPTracker {
/// to false allows for an externally managed iterator / program order.
void recede(const MachineInstr &MI);
+ /// Mostly copy/paste from CodeGen/RegisterPressure.cpp
+ /// Calculate the impact \p MI will have on CurPressure and MaxPressure. This
+ /// does not rely on the implicit program ordering in the LiveIntervals to
+ /// support RP Speculation. It leaves the state of pressure inconsistent with
+ /// the current position
void bumpUpwardPressure(const MachineInstr *MI, const SIRegisterInfo *TRI);
/// \p returns whether the tracker's state after receding MI corresponds
@@ -306,6 +312,11 @@ class GCNDownwardRPTracker : public GCNRPTracker {
MachineBasicBlock::const_iterator End,
const LiveRegSet *LiveRegsCopy = nullptr);
+ /// Mostly copy/paste from CodeGen/RegisterPressure.cpp
+ /// Calculate the impact \p MI will have on CurPressure and MaxPressure. This
+ /// does not rely on the implicit program ordering in the LiveIntervals to
+ /// support RP Speculation. It leaves the state of pressure inconsistent with
+ /// the current position
void bumpDownwardPressure(const MachineInstr *MI, const SIRegisterInfo *TRI);
};
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 40d17f8e0395ef..427ee592789608 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -190,7 +190,7 @@ static void getRegisterPressures(
Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
TempUpwardTracker.getPressure().getArchVGPRNum();
Pressure[AMDGPU::RegisterPressureSets::AGPR_32] =
- TempDownwardTracker.getPressure().getAGPRNum();
+ TempUpwardTracker.getPressure().getAGPRNum();
}
}
>From da1084c1f065c91f85651459177785e588fb3f92 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Wed, 18 Sep 2024 12:59:36 -0700
Subject: [PATCH 19/20] Avoid const_cast
Change-Id: Ib7b21b2ab4cc44abc61fb8ad8880fb78f831619a
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index eae33c63cba3b3..46bb3365a32337 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -598,15 +598,15 @@ bool GCNDownwardRPTracker::advanceBeforeNext(MachineInstr *MI,
LiveIntervals *TheLIS) {
assert(MRI && "call reset first");
SlotIndex SI;
- LiveIntervals *CurrLIS;
- MachineInstr *CurrMI;
+ const LiveIntervals *CurrLIS;
+ const MachineInstr *CurrMI;
if (UseInternalIterator) {
if (!LastTrackedMI)
return NextMI == MBBEnd;
assert(NextMI == MBBEnd || !NextMI->isDebugInstr());
- CurrLIS = const_cast<LiveIntervals *>(&LIS);
- CurrMI = const_cast<MachineInstr *>(LastTrackedMI);
+ CurrLIS = &LIS;
+ CurrMI = LastTrackedMI;
SI = NextMI == MBBEnd
? CurrLIS->getInstructionIndex(*LastTrackedMI).getDeadSlot()
@@ -672,7 +672,7 @@ void GCNDownwardRPTracker::advanceToNext(MachineInstr *MI,
LastTrackedMI = MI;
}
- MachineInstr *CurrMI = const_cast<MachineInstr *>(LastTrackedMI);
+ const MachineInstr *CurrMI = LastTrackedMI;
// Add new registers or mask bits.
for (const auto &MO : CurrMI->all_defs()) {
>From 80e066cd1da82d1d55b74d2b3cab62d8934d9947 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 23 Sep 2024 10:08:21 -0700
Subject: [PATCH 20/20] Fix shouldTrackVGPRs calculation
Change-Id: I3d0aae74f20927722cd6844b1d586ae7accab86e
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 427ee592789608..7bcb8c5b42a1c7 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -333,7 +333,7 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
} else {
- GCNRPTracker *T = Zone.isTop()
+ GCNRPTracker *T = IsBottomUp
? static_cast<GCNRPTracker *>(&UpwardTracker)
: static_cast<GCNRPTracker *>(&DownwardTracker);
SGPRPressure = T->getPressure().getSGPRNum();
More information about the llvm-commits
mailing list