[llvm] [AMDGPU] Refine GCNHazardRecognizer hasHazard() (PR #138841)
Carl Ritson via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 9 01:32:57 PDT 2025
https://github.com/perlfu updated https://github.com/llvm/llvm-project/pull/138841
>From 0230c8e9d31c6df2ab63632545fd0a1e1a4c52dc Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson at amd.com>
Date: Tue, 6 May 2025 16:06:15 +0900
Subject: [PATCH 1/4] [AMDGPU] Refine GCNHazardRecognizer hasHazard()
Remove recursion to avoid stack overflow on large CFGs.
Avoid worklist for hazard search within single MachineBasicBlock.
Ensure predecessors are visited for all state combinations.
---
.../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 79 +++++++++++--------
1 file changed, 48 insertions(+), 31 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index a3b64aee297b2..d36208a667276 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -441,42 +441,55 @@ using IsExpiredFn = function_ref<bool(const MachineInstr &, int WaitStates)>;
using GetNumWaitStatesFn = function_ref<unsigned int(const MachineInstr &)>;
// Search for a hazard in a block and its predecessors.
+// StateT must implement getHashValue().
template <typename StateT>
static bool
-hasHazard(StateT State,
+hasHazard(StateT InitialState,
function_ref<HazardFnResult(StateT &, const MachineInstr &)> IsHazard,
function_ref<void(StateT &, const MachineInstr &)> UpdateState,
- const MachineBasicBlock *MBB,
- MachineBasicBlock::const_reverse_instr_iterator I,
- DenseSet<const MachineBasicBlock *> &Visited) {
- for (auto E = MBB->instr_rend(); I != E; ++I) {
- // No need to look at parent BUNDLE instructions.
- if (I->isBundle())
- continue;
+ const MachineBasicBlock *InitialMBB,
+ MachineBasicBlock::const_reverse_instr_iterator InitialI) {
+ SmallVector<std::pair<const MachineBasicBlock *, StateT>> Worklist;
+ DenseSet<std::pair<const MachineBasicBlock *, unsigned>> Visited;
+ const MachineBasicBlock *MBB = InitialMBB;
+ StateT State = InitialState;
+ auto I = InitialI;
+
+ for (;;) {
+ bool Expired = false;
+ for (auto E = MBB->instr_rend(); I != E; ++I) {
+ // No need to look at parent BUNDLE instructions.
+ if (I->isBundle())
+ continue;
- switch (IsHazard(State, *I)) {
- case HazardFound:
- return true;
- case HazardExpired:
- return false;
- default:
- // Continue search
- break;
- }
+ auto Result = IsHazard(State, *I);
+ if (Result == HazardFound)
+ return true;
+ if (Result == HazardExpired) {
+ Expired = true;
+ break;
+ }
- if (I->isInlineAsm() || I->isMetaInstruction())
- continue;
+ if (I->isInlineAsm() || I->isMetaInstruction())
+ continue;
- UpdateState(State, *I);
- }
+ UpdateState(State, *I);
+ }
- for (MachineBasicBlock *Pred : MBB->predecessors()) {
- if (!Visited.insert(Pred).second)
- continue;
+ if (!Expired) {
+ unsigned StateHash = State.getHashValue();
+ for (MachineBasicBlock *Pred : MBB->predecessors()) {
+ if (!Visited.insert(std::pair(Pred, StateHash)).second)
+ continue;
+ Worklist.emplace_back(Pred, State);
+ }
+ }
- if (hasHazard(State, IsHazard, UpdateState, Pred, Pred->instr_rbegin(),
- Visited))
- return true;
+ if (Worklist.empty())
+ break;
+
+ std::tie(MBB, State) = Worklist.pop_back_val();
+ I = MBB->instr_rbegin();
}
return false;
@@ -1641,6 +1654,10 @@ bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
SmallDenseMap<Register, int, 4> DefPos;
int ExecPos = std::numeric_limits<int>::max();
int VALUs = 0;
+
+ unsigned getHashValue() const {
+ return hash_combine(ExecPos, VALUs, hash_combine_range(DefPos));
+ }
};
StateType State;
@@ -1735,9 +1752,8 @@ bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
State.VALUs += 1;
};
- DenseSet<const MachineBasicBlock *> Visited;
if (!hasHazard<StateType>(State, IsHazardFn, UpdateStateFn, MI->getParent(),
- std::next(MI->getReverseIterator()), Visited))
+ std::next(MI->getReverseIterator())))
return false;
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
@@ -1778,6 +1794,8 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
struct StateType {
int VALUs = 0;
int TRANS = 0;
+
+ unsigned getHashValue() const { return hash_combine(VALUs, TRANS); }
};
StateType State;
@@ -1813,9 +1831,8 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
State.TRANS += 1;
};
- DenseSet<const MachineBasicBlock *> Visited;
if (!hasHazard<StateType>(State, IsHazardFn, UpdateStateFn, MI->getParent(),
- std::next(MI->getReverseIterator()), Visited))
+ std::next(MI->getReverseIterator())))
return false;
// Hazard is observed - insert a wait on va_dst counter to ensure hazard is
>From 2a18fbe9b7a79924b2bbed2f6e80e3920bf95805 Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson at amd.com>
Date: Sun, 18 May 2025 14:45:25 +0900
Subject: [PATCH 2/4] - Rework to use unified store of states - Handle hashing
collisions
---
.../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 62 ++++++++++++++++---
1 file changed, 55 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index d36208a667276..5180f5a112d72 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -441,7 +441,7 @@ using IsExpiredFn = function_ref<bool(const MachineInstr &, int WaitStates)>;
using GetNumWaitStatesFn = function_ref<unsigned int(const MachineInstr &)>;
// Search for a hazard in a block and its predecessors.
-// StateT must implement getHashValue().
+// StateT must implement getHashValue() and isEqual().
template <typename StateT>
static bool
hasHazard(StateT InitialState,
@@ -449,8 +449,47 @@ hasHazard(StateT InitialState,
function_ref<void(StateT &, const MachineInstr &)> UpdateState,
const MachineBasicBlock *InitialMBB,
MachineBasicBlock::const_reverse_instr_iterator InitialI) {
- SmallVector<std::pair<const MachineBasicBlock *, StateT>> Worklist;
- DenseSet<std::pair<const MachineBasicBlock *, unsigned>> Visited;
+ SmallVector<std::pair<const MachineBasicBlock *, unsigned>> Worklist;
+ SmallDenseSet<std::pair<const MachineBasicBlock *, unsigned>> Visited;
+ SmallVector<std::pair<unsigned, unsigned>, 1> Collisions;
+ SmallDenseMap<unsigned, unsigned> StateHash2Idx;
+ SmallVector<StateT> States;
+
+ // States contains a vector of unique state structures.
+ // StateT is hashed via getHashValue() and StateHash2Idx maps each hash
+ // to an index in the States vector.
+ // In the unlikely event of a hash collision the Collision vector provides
+ // additional hash to index associations which must be retrieved by a linear
+ // scan.
+
+ // Retrieve unique constant index for a StateT structure in the States vector.
+ auto ResolveStateIdx = [&](const StateT State) {
+ unsigned StateHash = State.getHashValue();
+ unsigned StateIdx;
+ if (!StateHash2Idx.contains(StateHash)) {
+ StateIdx = States.size();
+ States.push_back(State);
+ StateHash2Idx[StateHash] = StateIdx;
+ } else {
+ StateIdx = StateHash2Idx[StateHash];
+ if (LLVM_UNLIKELY(!StateT::isEqual(State, States[StateIdx]))) {
+ // Hash collision
+ auto *Collision = llvm::find_if(Collisions, [&](auto &C) {
+ return C.first == StateHash &&
+ StateT::isEqual(State, States[C.second]);
+ });
+ if (Collision) {
+ StateIdx = Collision->second;
+ } else {
+ StateIdx = States.size();
+ States.push_back(State);
+ Collisions.emplace_back(StateHash, StateIdx);
+ }
+ }
+ }
+ return StateIdx;
+ };
+
const MachineBasicBlock *MBB = InitialMBB;
StateT State = InitialState;
auto I = InitialI;
@@ -477,18 +516,20 @@ hasHazard(StateT InitialState,
}
if (!Expired) {
- unsigned StateHash = State.getHashValue();
+ unsigned StateIdx = ResolveStateIdx(State);
for (MachineBasicBlock *Pred : MBB->predecessors()) {
- if (!Visited.insert(std::pair(Pred, StateHash)).second)
+ if (!Visited.insert(std::pair(Pred, StateIdx)).second)
continue;
- Worklist.emplace_back(Pred, State);
+ Worklist.emplace_back(Pred, StateIdx);
}
}
if (Worklist.empty())
break;
- std::tie(MBB, State) = Worklist.pop_back_val();
+ unsigned StateIdx;
+ std::tie(MBB, StateIdx) = Worklist.pop_back_val();
+ State = States[StateIdx];
I = MBB->instr_rbegin();
}
@@ -1658,6 +1699,10 @@ bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
unsigned getHashValue() const {
return hash_combine(ExecPos, VALUs, hash_combine_range(DefPos));
}
+ static bool isEqual(const StateType &LHS, const StateType &RHS) {
+ return LHS.DefPos == RHS.DefPos && LHS.ExecPos == RHS.ExecPos &&
+ LHS.VALUs == RHS.VALUs;
+ }
};
StateType State;
@@ -1796,6 +1841,9 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
int TRANS = 0;
unsigned getHashValue() const { return hash_combine(VALUs, TRANS); }
+ static bool isEqual(const StateType &LHS, const StateType &RHS) {
+ return LHS.VALUs == RHS.VALUs && LHS.TRANS == RHS.TRANS;
+ }
};
StateType State;
>From 9532454595ecbbdb70585e16edcaca27e9c1f613 Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson at amd.com>
Date: Mon, 19 May 2025 10:36:08 +0900
Subject: [PATCH 3/4] - Fix use of llvm::find_if
---
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 5180f5a112d72..912059ce87adb 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -474,11 +474,11 @@ hasHazard(StateT InitialState,
StateIdx = StateHash2Idx[StateHash];
if (LLVM_UNLIKELY(!StateT::isEqual(State, States[StateIdx]))) {
// Hash collision
- auto *Collision = llvm::find_if(Collisions, [&](auto &C) {
+ auto Collision = llvm::find_if(Collisions, [&](auto &C) {
return C.first == StateHash &&
StateT::isEqual(State, States[C.second]);
});
- if (Collision) {
+ if (Collision != Collisions.end()) {
StateIdx = Collision->second;
} else {
StateIdx = States.size();
>From 61a3e4893a640e9cbf83c894a151534c23c36158 Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson at amd.com>
Date: Tue, 9 Sep 2025 15:29:07 +0900
Subject: [PATCH 4/4] - Rewrite most of custom implementation to DenseMap with
traits
---
.../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 129 ++++++++++--------
1 file changed, 73 insertions(+), 56 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 912059ce87adb..44e2f7a206805 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -441,59 +441,63 @@ using IsExpiredFn = function_ref<bool(const MachineInstr &, int WaitStates)>;
using GetNumWaitStatesFn = function_ref<unsigned int(const MachineInstr &)>;
// Search for a hazard in a block and its predecessors.
-// StateT must implement getHashValue() and isEqual().
-template <typename StateT>
+template <typename StateT, typename StateTraitsT>
static bool
hasHazard(StateT InitialState,
function_ref<HazardFnResult(StateT &, const MachineInstr &)> IsHazard,
function_ref<void(StateT &, const MachineInstr &)> UpdateState,
const MachineBasicBlock *InitialMBB,
MachineBasicBlock::const_reverse_instr_iterator InitialI) {
- SmallVector<std::pair<const MachineBasicBlock *, unsigned>> Worklist;
- SmallDenseSet<std::pair<const MachineBasicBlock *, unsigned>> Visited;
- SmallVector<std::pair<unsigned, unsigned>, 1> Collisions;
- SmallDenseMap<unsigned, unsigned> StateHash2Idx;
- SmallVector<StateT> States;
-
- // States contains a vector of unique state structures.
- // StateT is hashed via getHashValue() and StateHash2Idx maps each hash
- // to an index in the States vector.
- // In the unlikely event of a hash collision the Collision vector provides
- // additional hash to index associations which must be retrieved by a linear
- // scan.
-
- // Retrieve unique constant index for a StateT structure in the States vector.
- auto ResolveStateIdx = [&](const StateT State) {
- unsigned StateHash = State.getHashValue();
- unsigned StateIdx;
- if (!StateHash2Idx.contains(StateHash)) {
- StateIdx = States.size();
- States.push_back(State);
- StateHash2Idx[StateHash] = StateIdx;
- } else {
- StateIdx = StateHash2Idx[StateHash];
- if (LLVM_UNLIKELY(!StateT::isEqual(State, States[StateIdx]))) {
- // Hash collision
- auto Collision = llvm::find_if(Collisions, [&](auto &C) {
- return C.first == StateHash &&
- StateT::isEqual(State, States[C.second]);
- });
- if (Collision != Collisions.end()) {
- StateIdx = Collision->second;
- } else {
- StateIdx = States.size();
- States.push_back(State);
- Collisions.emplace_back(StateHash, StateIdx);
- }
- }
+ struct StateMapKey {
+ SmallVectorImpl<StateT> *States;
+ unsigned Idx;
+ static bool isEqual(const StateMapKey &LHS, const StateMapKey &RHS) {
+ return LHS.States == RHS.States && LHS.Idx == RHS.Idx;
}
- return StateIdx;
};
+ struct StateMapKeyTraits : DenseMapInfo<StateMapKey> {
+ static inline StateMapKey getEmptyKey() {
+ return {static_cast<SmallVectorImpl<StateT> *>(
+ DenseMapInfo<void *>::getEmptyKey()),
+ DenseMapInfo<unsigned>::getEmptyKey()};
+ }
+ static inline StateMapKey getTombstoneKey() {
+ return {static_cast<SmallVectorImpl<StateT> *>(
+ DenseMapInfo<void *>::getTombstoneKey()),
+ DenseMapInfo<unsigned>::getTombstoneKey()};
+ }
+ static unsigned getHashValue(const StateMapKey &Key) {
+ return StateTraitsT::getHashValue((*Key.States)[Key.Idx]);
+ }
+ static unsigned getHashValue(const StateT &State) {
+ return StateTraitsT::getHashValue(State);
+ }
+ static bool isEqual(const StateMapKey &LHS, const StateMapKey &RHS) {
+ const auto EKey = getEmptyKey();
+ const auto TKey = getTombstoneKey();
+ if (StateMapKey::isEqual(LHS, EKey) || StateMapKey::isEqual(RHS, EKey) ||
+ StateMapKey::isEqual(LHS, TKey) || StateMapKey::isEqual(RHS, TKey))
+ return StateMapKey::isEqual(LHS, RHS);
+ return StateTraitsT::isEqual((*LHS.States)[LHS.Idx],
+ (*RHS.States)[RHS.Idx]);
+ }
+ static bool isEqual(const StateT &LHS, const StateMapKey &RHS) {
+ if (StateMapKey::isEqual(RHS, getEmptyKey()) ||
+ StateMapKey::isEqual(RHS, getTombstoneKey()))
+ return false;
+ return StateTraitsT::isEqual(LHS, (*RHS.States)[RHS.Idx]);
+ }
+ };
+
+ SmallDenseMap<StateMapKey, unsigned, 8, StateMapKeyTraits> StateMap;
+ SmallVector<StateT, 8> States;
const MachineBasicBlock *MBB = InitialMBB;
StateT State = InitialState;
auto I = InitialI;
+ SmallSetVector<std::pair<const MachineBasicBlock *, unsigned>, 16> Worklist;
+ unsigned WorkIdx = 0;
for (;;) {
bool Expired = false;
for (auto E = MBB->instr_rend(); I != E; ++I) {
@@ -516,19 +520,25 @@ hasHazard(StateT InitialState,
}
if (!Expired) {
- unsigned StateIdx = ResolveStateIdx(State);
- for (MachineBasicBlock *Pred : MBB->predecessors()) {
- if (!Visited.insert(std::pair(Pred, StateIdx)).second)
- continue;
- Worklist.emplace_back(Pred, StateIdx);
+ auto StateIt = StateMap.find_as(State);
+ unsigned StateIdx;
+ if (StateIt != StateMap.end()) {
+ StateIdx = StateIt->getSecond();
+ } else {
+ StateIdx = States.size();
+ States.emplace_back(State);
+ StateMapKey Key = {&States, StateIdx};
+ StateMap.insert(std::pair(Key, StateIdx));
}
+ for (MachineBasicBlock *Pred : MBB->predecessors())
+ Worklist.insert(std::pair(Pred, StateIdx));
}
- if (Worklist.empty())
+ if (WorkIdx == Worklist.size())
break;
unsigned StateIdx;
- std::tie(MBB, StateIdx) = Worklist.pop_back_val();
+ std::tie(MBB, StateIdx) = Worklist[WorkIdx++];
State = States[StateIdx];
I = MBB->instr_rbegin();
}
@@ -1695,9 +1705,11 @@ bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
SmallDenseMap<Register, int, 4> DefPos;
int ExecPos = std::numeric_limits<int>::max();
int VALUs = 0;
-
- unsigned getHashValue() const {
- return hash_combine(ExecPos, VALUs, hash_combine_range(DefPos));
+ };
+ struct StateTraits {
+ static unsigned getHashValue(const StateType &State) {
+ return hash_combine(State.ExecPos, State.VALUs,
+ hash_combine_range(State.DefPos));
}
static bool isEqual(const StateType &LHS, const StateType &RHS) {
return LHS.DefPos == RHS.DefPos && LHS.ExecPos == RHS.ExecPos &&
@@ -1797,8 +1809,9 @@ bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
State.VALUs += 1;
};
- if (!hasHazard<StateType>(State, IsHazardFn, UpdateStateFn, MI->getParent(),
- std::next(MI->getReverseIterator())))
+ if (!hasHazard<StateType, StateTraits>(State, IsHazardFn, UpdateStateFn,
+ MI->getParent(),
+ std::next(MI->getReverseIterator())))
return false;
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
@@ -1839,8 +1852,11 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
struct StateType {
int VALUs = 0;
int TRANS = 0;
-
- unsigned getHashValue() const { return hash_combine(VALUs, TRANS); }
+ };
+ struct StateTraits {
+ static unsigned getHashValue(const StateType &State) {
+ return hash_combine(State.VALUs, State.TRANS);
+ }
static bool isEqual(const StateType &LHS, const StateType &RHS) {
return LHS.VALUs == RHS.VALUs && LHS.TRANS == RHS.TRANS;
}
@@ -1879,8 +1895,9 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
State.TRANS += 1;
};
- if (!hasHazard<StateType>(State, IsHazardFn, UpdateStateFn, MI->getParent(),
- std::next(MI->getReverseIterator())))
+ if (!hasHazard<StateType, StateTraits>(State, IsHazardFn, UpdateStateFn,
+ MI->getParent(),
+ std::next(MI->getReverseIterator())))
return false;
// Hazard is observed - insert a wait on va_dst counter to ensure hazard is
More information about the llvm-commits
mailing list