[llvm] [NFCI][AMDGPU] Try to use PressureDiff to Calculate RegPressure. (PR #94221)

Thu Jun 13 01:08:56 PDT 2024

================
@@ -116,31 +116,112 @@ void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
                     << ", SGPRExcessLimit = " << SGPRExcessLimit << "\n\n");
 }
 
+/// Checks whether \p SU can use the cached DAG pressure diffs to compute the
+/// current register pressure.
+///
+/// This works for the common case, but it has a few exceptions that have been
+/// observed through trial and error:
+///   - Explicit physical register operands
+///   - Subregister definitions
+///
+/// In both of those cases, PressureDiff doesn't represent the actual pressure,
+/// and querying LiveIntervals through the RegPressureTracker is needed to get
+/// an accurate value.
+///
+/// We should eventually only use PressureDiff for maximum performance, but this
+/// already allows 80% of SUs to take the fast path without changing scheduling
+/// at all. Further changes would either change scheduling, or require a lot
+/// more logic to recover an accurate pressure estimate from the PressureDiffs.
+static bool canUsePressureDiffs(const SUnit &SU) {
+  if (!SU.isInstr())
+    return false;
+
+  // Cannot use pressure diffs for subregister defs or with physregs, it's
+  // imprecise in both cases.
+  for (const auto &Op : SU.getInstr()->operands()) {
+    if (!Op.isReg() || Op.isImplicit())
+      continue;
+    if (Op.getReg().isPhysical() ||
+        (Op.isDef() && Op.getSubReg() != AMDGPU::NoSubRegister))
+      return false;
+  }
+  return true;
+}
+
+static void getRegisterPressures(bool AtTop,
+                                 const RegPressureTracker &RPTracker, SUnit *SU,
+                                 std::vector<unsigned> &Pressure,
+                                 std::vector<unsigned> &MaxPressure) {
+  // getDownwardPressure() and getUpwardPressure() make temporary changes to
+  // the tracker, so we need to pass those function a non-const copy.
+  RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
+  if (AtTop)
+    TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
+  else
+    TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
+}
+
 void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
                                      bool AtTop,
                                      const RegPressureTracker &RPTracker,
                                      const SIRegisterInfo *SRI,
                                      unsigned SGPRPressure,
-                                     unsigned VGPRPressure) {
+                                     unsigned VGPRPressure, bool IsBottomUp) {
   Cand.SU = SU;
   Cand.AtTop = AtTop;
 
   if (!DAG->isTrackingPressure())
     return;
 
-  // getDownwardPressure() and getUpwardPressure() make temporary changes to
-  // the tracker, so we need to pass those function a non-const copy.
-  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
-
   Pressure.clear();
   MaxPressure.clear();
 
-  if (AtTop)
-    TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
-  else {
-    // FIXME: I think for bottom up scheduling, the register pressure is cached
-    // and can be retrieved by DAG->getPressureDif(SU).
-    TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
+  // We try to use the cached PressureDiffs in the ScheduleDAG whenever
+  // possible over querying the RegPressureTracker.
+  //
+  // RegPressureTracker will make a lot of LIS queries which are very
+  // expensive, it is considered a slow function in this context.
+  //
+  // PressureDiffs are precomputed and cached, and getPressureDiff is just a
+  // trivial lookup into an array. It is pretty much free.
+  //
+  // In EXPENSIVE_CHECKS, we always query RPTracker to verify the results of
+  // PressureDiffs.
+  if (AtTop || !canUsePressureDiffs(*SU)) {
+    getRegisterPressures(AtTop, RPTracker, SU, Pressure, MaxPressure);
+  } else {
+    // Reserve 4 slots.
+    Pressure.resize(4, 0);
+    Pressure[AMDGPU::RegisterPressureSets::SReg_32] = SGPRPressure;
+    Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = VGPRPressure;
+
+    for (const auto &Diff : DAG->getPressureDiff(SU)) {
+      if (!Diff.isValid())
+        continue;
+      // PressureDiffs is always bottom-up so if we're working top-down we need
+      // to invert its sign.
+      Pressure[Diff.getPSet()] +=
+          (IsBottomUp ? Diff.getUnitInc() : -Diff.getUnitInc());
+    }
+
+#ifdef EXPENSIVE_CHECKS
+    std::vector<unsigned> CheckPressure, CheckMaxPressure;
+    getRegisterPressures(AtTop, RPTracker, SU, CheckPressure, CheckMaxPressure);
+    if (Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=
+            CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||
+        Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=
+            CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32]) {
+      errs() << "Register Pressure is inaccurate when calculated through "
+                "PressureDiff\n";
+      errs() << "SGPR got " << Pressure[AMDGPU::RegisterPressureSets::SReg_32]
+             << ", expected "
+             << CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] << "\n";
+      errs() << "VGPR got " << Pressure[AMDGPU::RegisterPressureSets::VGPR_32]
+             << ", expected "
+             << CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n";
----------------
arsenm wrote:

This can be one big chained errs() 

https://github.com/llvm/llvm-project/pull/94221