[llvm] [AMDGPU] Refine GCNHazardRecognizer hasHazard() (PR #138841)

Carl Ritson via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 9 01:32:57 PDT 2025


https://github.com/perlfu updated https://github.com/llvm/llvm-project/pull/138841

>From 0230c8e9d31c6df2ab63632545fd0a1e1a4c52dc Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson at amd.com>
Date: Tue, 6 May 2025 16:06:15 +0900
Subject: [PATCH 1/4] [AMDGPU] Refine GCNHazardRecognizer hasHazard()

Remove recursion to avoid stack overflow on large CFGs.
Avoid worklist for hazard search within single MachineBasicBlock.
Ensure predecessors are visited for all state combinations.
---
 .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 79 +++++++++++--------
 1 file changed, 48 insertions(+), 31 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index a3b64aee297b2..d36208a667276 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -441,42 +441,55 @@ using IsExpiredFn = function_ref<bool(const MachineInstr &, int WaitStates)>;
 using GetNumWaitStatesFn = function_ref<unsigned int(const MachineInstr &)>;
 
 // Search for a hazard in a block and its predecessors.
+// StateT must implement getHashValue().
 template <typename StateT>
 static bool
-hasHazard(StateT State,
+hasHazard(StateT InitialState,
           function_ref<HazardFnResult(StateT &, const MachineInstr &)> IsHazard,
           function_ref<void(StateT &, const MachineInstr &)> UpdateState,
-          const MachineBasicBlock *MBB,
-          MachineBasicBlock::const_reverse_instr_iterator I,
-          DenseSet<const MachineBasicBlock *> &Visited) {
-  for (auto E = MBB->instr_rend(); I != E; ++I) {
-    // No need to look at parent BUNDLE instructions.
-    if (I->isBundle())
-      continue;
+          const MachineBasicBlock *InitialMBB,
+          MachineBasicBlock::const_reverse_instr_iterator InitialI) {
+  SmallVector<std::pair<const MachineBasicBlock *, StateT>> Worklist;
+  DenseSet<std::pair<const MachineBasicBlock *, unsigned>> Visited;
+  const MachineBasicBlock *MBB = InitialMBB;
+  StateT State = InitialState;
+  auto I = InitialI;
+
+  for (;;) {
+    bool Expired = false;
+    for (auto E = MBB->instr_rend(); I != E; ++I) {
+      // No need to look at parent BUNDLE instructions.
+      if (I->isBundle())
+        continue;
 
-    switch (IsHazard(State, *I)) {
-    case HazardFound:
-      return true;
-    case HazardExpired:
-      return false;
-    default:
-      // Continue search
-      break;
-    }
+      auto Result = IsHazard(State, *I);
+      if (Result == HazardFound)
+        return true;
+      if (Result == HazardExpired) {
+        Expired = true;
+        break;
+      }
 
-    if (I->isInlineAsm() || I->isMetaInstruction())
-      continue;
+      if (I->isInlineAsm() || I->isMetaInstruction())
+        continue;
 
-    UpdateState(State, *I);
-  }
+      UpdateState(State, *I);
+    }
 
-  for (MachineBasicBlock *Pred : MBB->predecessors()) {
-    if (!Visited.insert(Pred).second)
-      continue;
+    if (!Expired) {
+      unsigned StateHash = State.getHashValue();
+      for (MachineBasicBlock *Pred : MBB->predecessors()) {
+        if (!Visited.insert(std::pair(Pred, StateHash)).second)
+          continue;
+        Worklist.emplace_back(Pred, State);
+      }
+    }
 
-    if (hasHazard(State, IsHazard, UpdateState, Pred, Pred->instr_rbegin(),
-                  Visited))
-      return true;
+    if (Worklist.empty())
+      break;
+
+    std::tie(MBB, State) = Worklist.pop_back_val();
+    I = MBB->instr_rbegin();
   }
 
   return false;
@@ -1641,6 +1654,10 @@ bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
     SmallDenseMap<Register, int, 4> DefPos;
     int ExecPos = std::numeric_limits<int>::max();
     int VALUs = 0;
+
+    unsigned getHashValue() const {
+      return hash_combine(ExecPos, VALUs, hash_combine_range(DefPos));
+    }
   };
 
   StateType State;
@@ -1735,9 +1752,8 @@ bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
       State.VALUs += 1;
   };
 
-  DenseSet<const MachineBasicBlock *> Visited;
   if (!hasHazard<StateType>(State, IsHazardFn, UpdateStateFn, MI->getParent(),
-                            std::next(MI->getReverseIterator()), Visited))
+                            std::next(MI->getReverseIterator())))
     return false;
 
   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
@@ -1778,6 +1794,8 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
   struct StateType {
     int VALUs = 0;
     int TRANS = 0;
+
+    unsigned getHashValue() const { return hash_combine(VALUs, TRANS); }
   };
 
   StateType State;
@@ -1813,9 +1831,8 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
       State.TRANS += 1;
   };
 
-  DenseSet<const MachineBasicBlock *> Visited;
   if (!hasHazard<StateType>(State, IsHazardFn, UpdateStateFn, MI->getParent(),
-                            std::next(MI->getReverseIterator()), Visited))
+                            std::next(MI->getReverseIterator())))
     return false;
 
   // Hazard is observed - insert a wait on va_dst counter to ensure hazard is

>From 2a18fbe9b7a79924b2bbed2f6e80e3920bf95805 Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson at amd.com>
Date: Sun, 18 May 2025 14:45:25 +0900
Subject: [PATCH 2/4] - Rework to use unified store of states - Handle hashing
 collisions

---
 .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 62 ++++++++++++++++---
 1 file changed, 55 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index d36208a667276..5180f5a112d72 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -441,7 +441,7 @@ using IsExpiredFn = function_ref<bool(const MachineInstr &, int WaitStates)>;
 using GetNumWaitStatesFn = function_ref<unsigned int(const MachineInstr &)>;
 
 // Search for a hazard in a block and its predecessors.
-// StateT must implement getHashValue().
+// StateT must implement getHashValue() and isEqual().
 template <typename StateT>
 static bool
 hasHazard(StateT InitialState,
@@ -449,8 +449,47 @@ hasHazard(StateT InitialState,
           function_ref<void(StateT &, const MachineInstr &)> UpdateState,
           const MachineBasicBlock *InitialMBB,
           MachineBasicBlock::const_reverse_instr_iterator InitialI) {
-  SmallVector<std::pair<const MachineBasicBlock *, StateT>> Worklist;
-  DenseSet<std::pair<const MachineBasicBlock *, unsigned>> Visited;
+  SmallVector<std::pair<const MachineBasicBlock *, unsigned>> Worklist;
+  SmallDenseSet<std::pair<const MachineBasicBlock *, unsigned>> Visited;
+  SmallVector<std::pair<unsigned, unsigned>, 1> Collisions;
+  SmallDenseMap<unsigned, unsigned> StateHash2Idx;
+  SmallVector<StateT> States;
+
+  // States contains a vector of unique state structures.
+  // StateT is hashed via getHashValue() and StateHash2Idx maps each hash
+  // to an index in the States vector.
+  // In the unlikely event of a hash collision the Collision vector provides
+  // additional hash to index associations which must be retrieved by a linear
+  // scan.
+
+  // Retrieve unique constant index for a StateT structure in the States vector.
+  auto ResolveStateIdx = [&](const StateT State) {
+    unsigned StateHash = State.getHashValue();
+    unsigned StateIdx;
+    if (!StateHash2Idx.contains(StateHash)) {
+      StateIdx = States.size();
+      States.push_back(State);
+      StateHash2Idx[StateHash] = StateIdx;
+    } else {
+      StateIdx = StateHash2Idx[StateHash];
+      if (LLVM_UNLIKELY(!StateT::isEqual(State, States[StateIdx]))) {
+        // Hash collision
+        auto *Collision = llvm::find_if(Collisions, [&](auto &C) {
+          return C.first == StateHash &&
+                 StateT::isEqual(State, States[C.second]);
+        });
+        if (Collision) {
+          StateIdx = Collision->second;
+        } else {
+          StateIdx = States.size();
+          States.push_back(State);
+          Collisions.emplace_back(StateHash, StateIdx);
+        }
+      }
+    }
+    return StateIdx;
+  };
+
   const MachineBasicBlock *MBB = InitialMBB;
   StateT State = InitialState;
   auto I = InitialI;
@@ -477,18 +516,20 @@ hasHazard(StateT InitialState,
     }
 
     if (!Expired) {
-      unsigned StateHash = State.getHashValue();
+      unsigned StateIdx = ResolveStateIdx(State);
       for (MachineBasicBlock *Pred : MBB->predecessors()) {
-        if (!Visited.insert(std::pair(Pred, StateHash)).second)
+        if (!Visited.insert(std::pair(Pred, StateIdx)).second)
           continue;
-        Worklist.emplace_back(Pred, State);
+        Worklist.emplace_back(Pred, StateIdx);
       }
     }
 
     if (Worklist.empty())
       break;
 
-    std::tie(MBB, State) = Worklist.pop_back_val();
+    unsigned StateIdx;
+    std::tie(MBB, StateIdx) = Worklist.pop_back_val();
+    State = States[StateIdx];
     I = MBB->instr_rbegin();
   }
 
@@ -1658,6 +1699,10 @@ bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
     unsigned getHashValue() const {
       return hash_combine(ExecPos, VALUs, hash_combine_range(DefPos));
     }
+    static bool isEqual(const StateType &LHS, const StateType &RHS) {
+      return LHS.DefPos == RHS.DefPos && LHS.ExecPos == RHS.ExecPos &&
+             LHS.VALUs == RHS.VALUs;
+    }
   };
 
   StateType State;
@@ -1796,6 +1841,9 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
     int TRANS = 0;
 
     unsigned getHashValue() const { return hash_combine(VALUs, TRANS); }
+    static bool isEqual(const StateType &LHS, const StateType &RHS) {
+      return LHS.VALUs == RHS.VALUs && LHS.TRANS == RHS.TRANS;
+    }
   };
 
   StateType State;

>From 9532454595ecbbdb70585e16edcaca27e9c1f613 Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson at amd.com>
Date: Mon, 19 May 2025 10:36:08 +0900
Subject: [PATCH 3/4] - Fix use of llvm::find_if

---
 llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 5180f5a112d72..912059ce87adb 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -474,11 +474,11 @@ hasHazard(StateT InitialState,
       StateIdx = StateHash2Idx[StateHash];
       if (LLVM_UNLIKELY(!StateT::isEqual(State, States[StateIdx]))) {
         // Hash collision
-        auto *Collision = llvm::find_if(Collisions, [&](auto &C) {
+        auto Collision = llvm::find_if(Collisions, [&](auto &C) {
           return C.first == StateHash &&
                  StateT::isEqual(State, States[C.second]);
         });
-        if (Collision) {
+        if (Collision != Collisions.end()) {
           StateIdx = Collision->second;
         } else {
           StateIdx = States.size();

>From 61a3e4893a640e9cbf83c894a151534c23c36158 Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson at amd.com>
Date: Tue, 9 Sep 2025 15:29:07 +0900
Subject: [PATCH 4/4] - Rewrite most of custom implementation to DenseMap with
 traits

---
 .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 129 ++++++++++--------
 1 file changed, 73 insertions(+), 56 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 912059ce87adb..44e2f7a206805 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -441,59 +441,63 @@ using IsExpiredFn = function_ref<bool(const MachineInstr &, int WaitStates)>;
 using GetNumWaitStatesFn = function_ref<unsigned int(const MachineInstr &)>;
 
 // Search for a hazard in a block and its predecessors.
-// StateT must implement getHashValue() and isEqual().
-template <typename StateT>
+template <typename StateT, typename StateTraitsT>
 static bool
 hasHazard(StateT InitialState,
           function_ref<HazardFnResult(StateT &, const MachineInstr &)> IsHazard,
           function_ref<void(StateT &, const MachineInstr &)> UpdateState,
           const MachineBasicBlock *InitialMBB,
           MachineBasicBlock::const_reverse_instr_iterator InitialI) {
-  SmallVector<std::pair<const MachineBasicBlock *, unsigned>> Worklist;
-  SmallDenseSet<std::pair<const MachineBasicBlock *, unsigned>> Visited;
-  SmallVector<std::pair<unsigned, unsigned>, 1> Collisions;
-  SmallDenseMap<unsigned, unsigned> StateHash2Idx;
-  SmallVector<StateT> States;
-
-  // States contains a vector of unique state structures.
-  // StateT is hashed via getHashValue() and StateHash2Idx maps each hash
-  // to an index in the States vector.
-  // In the unlikely event of a hash collision the Collision vector provides
-  // additional hash to index associations which must be retrieved by a linear
-  // scan.
-
-  // Retrieve unique constant index for a StateT structure in the States vector.
-  auto ResolveStateIdx = [&](const StateT State) {
-    unsigned StateHash = State.getHashValue();
-    unsigned StateIdx;
-    if (!StateHash2Idx.contains(StateHash)) {
-      StateIdx = States.size();
-      States.push_back(State);
-      StateHash2Idx[StateHash] = StateIdx;
-    } else {
-      StateIdx = StateHash2Idx[StateHash];
-      if (LLVM_UNLIKELY(!StateT::isEqual(State, States[StateIdx]))) {
-        // Hash collision
-        auto Collision = llvm::find_if(Collisions, [&](auto &C) {
-          return C.first == StateHash &&
-                 StateT::isEqual(State, States[C.second]);
-        });
-        if (Collision != Collisions.end()) {
-          StateIdx = Collision->second;
-        } else {
-          StateIdx = States.size();
-          States.push_back(State);
-          Collisions.emplace_back(StateHash, StateIdx);
-        }
-      }
+  struct StateMapKey {
+    SmallVectorImpl<StateT> *States;
+    unsigned Idx;
+    static bool isEqual(const StateMapKey &LHS, const StateMapKey &RHS) {
+      return LHS.States == RHS.States && LHS.Idx == RHS.Idx;
     }
-    return StateIdx;
   };
+  struct StateMapKeyTraits : DenseMapInfo<StateMapKey> {
+    static inline StateMapKey getEmptyKey() {
+      return {static_cast<SmallVectorImpl<StateT> *>(
+                  DenseMapInfo<void *>::getEmptyKey()),
+              DenseMapInfo<unsigned>::getEmptyKey()};
+    }
+    static inline StateMapKey getTombstoneKey() {
+      return {static_cast<SmallVectorImpl<StateT> *>(
+                  DenseMapInfo<void *>::getTombstoneKey()),
+              DenseMapInfo<unsigned>::getTombstoneKey()};
+    }
+    static unsigned getHashValue(const StateMapKey &Key) {
+      return StateTraitsT::getHashValue((*Key.States)[Key.Idx]);
+    }
+    static unsigned getHashValue(const StateT &State) {
+      return StateTraitsT::getHashValue(State);
+    }
+    static bool isEqual(const StateMapKey &LHS, const StateMapKey &RHS) {
+      const auto EKey = getEmptyKey();
+      const auto TKey = getTombstoneKey();
+      if (StateMapKey::isEqual(LHS, EKey) || StateMapKey::isEqual(RHS, EKey) ||
+          StateMapKey::isEqual(LHS, TKey) || StateMapKey::isEqual(RHS, TKey))
+        return StateMapKey::isEqual(LHS, RHS);
+      return StateTraitsT::isEqual((*LHS.States)[LHS.Idx],
+                                   (*RHS.States)[RHS.Idx]);
+    }
+    static bool isEqual(const StateT &LHS, const StateMapKey &RHS) {
+      if (StateMapKey::isEqual(RHS, getEmptyKey()) ||
+          StateMapKey::isEqual(RHS, getTombstoneKey()))
+        return false;
+      return StateTraitsT::isEqual(LHS, (*RHS.States)[RHS.Idx]);
+    }
+  };
+
+  SmallDenseMap<StateMapKey, unsigned, 8, StateMapKeyTraits> StateMap;
+  SmallVector<StateT, 8> States;
 
   const MachineBasicBlock *MBB = InitialMBB;
   StateT State = InitialState;
   auto I = InitialI;
 
+  SmallSetVector<std::pair<const MachineBasicBlock *, unsigned>, 16> Worklist;
+  unsigned WorkIdx = 0;
   for (;;) {
     bool Expired = false;
     for (auto E = MBB->instr_rend(); I != E; ++I) {
@@ -516,19 +520,25 @@ hasHazard(StateT InitialState,
     }
 
     if (!Expired) {
-      unsigned StateIdx = ResolveStateIdx(State);
-      for (MachineBasicBlock *Pred : MBB->predecessors()) {
-        if (!Visited.insert(std::pair(Pred, StateIdx)).second)
-          continue;
-        Worklist.emplace_back(Pred, StateIdx);
+      auto StateIt = StateMap.find_as(State);
+      unsigned StateIdx;
+      if (StateIt != StateMap.end()) {
+        StateIdx = StateIt->getSecond();
+      } else {
+        StateIdx = States.size();
+        States.emplace_back(State);
+        StateMapKey Key = {&States, StateIdx};
+        StateMap.insert(std::pair(Key, StateIdx));
       }
+      for (MachineBasicBlock *Pred : MBB->predecessors())
+        Worklist.insert(std::pair(Pred, StateIdx));
     }
 
-    if (Worklist.empty())
+    if (WorkIdx == Worklist.size())
       break;
 
     unsigned StateIdx;
-    std::tie(MBB, StateIdx) = Worklist.pop_back_val();
+    std::tie(MBB, StateIdx) = Worklist[WorkIdx++];
     State = States[StateIdx];
     I = MBB->instr_rbegin();
   }
@@ -1695,9 +1705,11 @@ bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
     SmallDenseMap<Register, int, 4> DefPos;
     int ExecPos = std::numeric_limits<int>::max();
     int VALUs = 0;
-
-    unsigned getHashValue() const {
-      return hash_combine(ExecPos, VALUs, hash_combine_range(DefPos));
+  };
+  struct StateTraits {
+    static unsigned getHashValue(const StateType &State) {
+      return hash_combine(State.ExecPos, State.VALUs,
+                          hash_combine_range(State.DefPos));
     }
     static bool isEqual(const StateType &LHS, const StateType &RHS) {
       return LHS.DefPos == RHS.DefPos && LHS.ExecPos == RHS.ExecPos &&
@@ -1797,8 +1809,9 @@ bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
       State.VALUs += 1;
   };
 
-  if (!hasHazard<StateType>(State, IsHazardFn, UpdateStateFn, MI->getParent(),
-                            std::next(MI->getReverseIterator())))
+  if (!hasHazard<StateType, StateTraits>(State, IsHazardFn, UpdateStateFn,
+                                         MI->getParent(),
+                                         std::next(MI->getReverseIterator())))
     return false;
 
   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
@@ -1839,8 +1852,11 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
   struct StateType {
     int VALUs = 0;
     int TRANS = 0;
-
-    unsigned getHashValue() const { return hash_combine(VALUs, TRANS); }
+  };
+  struct StateTraits {
+    static unsigned getHashValue(const StateType &State) {
+      return hash_combine(State.VALUs, State.TRANS);
+    }
     static bool isEqual(const StateType &LHS, const StateType &RHS) {
       return LHS.VALUs == RHS.VALUs && LHS.TRANS == RHS.TRANS;
     }
@@ -1879,8 +1895,9 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
       State.TRANS += 1;
   };
 
-  if (!hasHazard<StateType>(State, IsHazardFn, UpdateStateFn, MI->getParent(),
-                            std::next(MI->getReverseIterator())))
+  if (!hasHazard<StateType, StateTraits>(State, IsHazardFn, UpdateStateFn,
+                                         MI->getParent(),
+                                         std::next(MI->getReverseIterator())))
     return false;
 
   // Hazard is observed - insert a wait on va_dst counter to ensure hazard is



More information about the llvm-commits mailing list