[llvm] [AMDGPU] GCNHazardRecognizer: refactor getWaitStatesSince (NFCI) (PR #108347)

Carl Ritson via llvm-commits llvm-commits at lists.llvm.org
Sat Sep 14 23:37:55 PDT 2024


================
@@ -495,51 +496,104 @@ hasHazard(StateT State,
   return false;
 }
 
-// Returns a minimum wait states since \p I walking all predecessors.
-// Only scans until \p IsExpired does not return true.
-// Can only be run in a hazard recognizer mode.
-static int getWaitStatesSince(
+// Update \p WaitStates while iterating from \p I to hazard in \p MBB.
+static HazardFnResult countWaitStatesSince(
     GCNHazardRecognizer::IsHazardFn IsHazard, const MachineBasicBlock *MBB,
-    MachineBasicBlock::const_reverse_instr_iterator I, int WaitStates,
-    IsExpiredFn IsExpired, DenseSet<const MachineBasicBlock *> &Visited,
-    GetNumWaitStatesFn GetNumWaitStates = SIInstrInfo::getNumWaitStates) {
+    MachineBasicBlock::const_reverse_instr_iterator I, int &WaitStates,
+    IsExpiredFn IsExpired, GetNumWaitStatesFn GetNumWaitStates) {
   for (auto E = MBB->instr_rend(); I != E; ++I) {
     // Don't add WaitStates for parent BUNDLE instructions.
     if (I->isBundle())
       continue;
 
     if (IsHazard(*I))
-      return WaitStates;
+      return HazardFound;
 
     if (I->isInlineAsm())
       continue;
 
     WaitStates += GetNumWaitStates(*I);
 
     if (IsExpired(*I, WaitStates))
-      return std::numeric_limits<int>::max();
+      return HazardExpired;
+  }
+
+  return NoHazardFound;
+}
+
+// Implements predecessor search for getWaitStatesSince.
+static int getWaitStatesSinceImpl(
+    GCNHazardRecognizer::IsHazardFn IsHazard,
+    const MachineBasicBlock *InitialMBB, int InitialWaitStates,
+    IsExpiredFn IsExpired,
+    GetNumWaitStatesFn GetNumWaitStates = SIInstrInfo::getNumWaitStates) {
+  DenseMap<const MachineBasicBlock *, int> Visited;
+
+  // Build worklist of predecessors.
+  // Note: use queue so search is breadth first, which reduces search space
+  // when a hazard is found.
+  std::queue<const MachineBasicBlock *> Worklist;
+  for (MachineBasicBlock *Pred : InitialMBB->predecessors()) {
+    Visited[Pred] = InitialWaitStates;
+    Worklist.push(Pred);
   }
 
+  // Find minimum wait states to hazard or determine that all paths expire.
   int MinWaitStates = std::numeric_limits<int>::max();
-  for (MachineBasicBlock *Pred : MBB->predecessors()) {
-    if (!Visited.insert(Pred).second)
-      continue;
+  while (!Worklist.empty()) {
+    const MachineBasicBlock *MBB = Worklist.front();
+    int WaitStates = Visited[MBB];
+    Worklist.pop();
 
-    int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(), WaitStates,
-                               IsExpired, Visited, GetNumWaitStates);
+    // No reason to search blocks when wait states exceed established minimum.
+    if (WaitStates >= MinWaitStates)
+      continue;
 
-    MinWaitStates = std::min(MinWaitStates, W);
+    // Search for hazard
+    auto Search = countWaitStatesSince(IsHazard, MBB, MBB->instr_rbegin(),
+                                       WaitStates, IsExpired, GetNumWaitStates);
+    if (Search == HazardFound) {
+      // Update minimum.
+      MinWaitStates = std::min(MinWaitStates, WaitStates);
+    } else if (Search == NoHazardFound && WaitStates < MinWaitStates) {
+      // Search predecessors.
+      for (MachineBasicBlock *Pred : MBB->predecessors()) {
+        if (!Visited.contains(Pred) || WaitStates < Visited[Pred]) {
+          // Store lowest wait states required to visit this block.
+          Visited[Pred] = WaitStates;
+          Worklist.push(Pred);
----------------
perlfu wrote:

I'm aware of the duplication possibility, but every test I have done suggests the general overhead of adding a set or similar to avoid duplicate visits adds unnecessary cost.
I agree an anti-patterns that could exist that represent a worst case, but in practice I don't think this is an issue.

https://github.com/llvm/llvm-project/pull/108347


More information about the llvm-commits mailing list