[llvm] [AMDGPU] Refine GCNHazardRecognizer hasHazard() (PR #138841)
Carl Ritson via llvm-commits
llvm-commits at lists.llvm.org
Sun May 18 18:36:52 PDT 2025
https://github.com/perlfu updated https://github.com/llvm/llvm-project/pull/138841
>From bd372850b3c56db63478989ea3eea150582d72fc Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson at amd.com>
Date: Tue, 6 May 2025 16:06:15 +0900
Subject: [PATCH 1/3] [AMDGPU] Refine GCNHazardRecognizer hasHazard()
Remove recursion to avoid stack overflow on large CFGs.
Avoid worklist for hazard search within single MachineBasicBlock.
Ensure predecessors are visited for all state combinations.
---
.../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 79 +++++++++++--------
1 file changed, 48 insertions(+), 31 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index aaefe27b1324f..644fbb77a495a 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -436,42 +436,55 @@ using IsExpiredFn = function_ref<bool(const MachineInstr &, int WaitStates)>;
using GetNumWaitStatesFn = function_ref<unsigned int(const MachineInstr &)>;
// Search for a hazard in a block and its predecessors.
+// StateT must implement getHashValue().
template <typename StateT>
static bool
-hasHazard(StateT State,
+hasHazard(StateT InitialState,
function_ref<HazardFnResult(StateT &, const MachineInstr &)> IsHazard,
function_ref<void(StateT &, const MachineInstr &)> UpdateState,
- const MachineBasicBlock *MBB,
- MachineBasicBlock::const_reverse_instr_iterator I,
- DenseSet<const MachineBasicBlock *> &Visited) {
- for (auto E = MBB->instr_rend(); I != E; ++I) {
- // No need to look at parent BUNDLE instructions.
- if (I->isBundle())
- continue;
+ const MachineBasicBlock *InitialMBB,
+ MachineBasicBlock::const_reverse_instr_iterator InitialI) {
+ SmallVector<std::pair<const MachineBasicBlock *, StateT>> Worklist;
+ DenseSet<std::pair<const MachineBasicBlock *, unsigned>> Visited;
+ const MachineBasicBlock *MBB = InitialMBB;
+ StateT State = InitialState;
+ auto I = InitialI;
+
+ for (;;) {
+ bool Expired = false;
+ for (auto E = MBB->instr_rend(); I != E; ++I) {
+ // No need to look at parent BUNDLE instructions.
+ if (I->isBundle())
+ continue;
- switch (IsHazard(State, *I)) {
- case HazardFound:
- return true;
- case HazardExpired:
- return false;
- default:
- // Continue search
- break;
- }
+ auto Result = IsHazard(State, *I);
+ if (Result == HazardFound)
+ return true;
+ if (Result == HazardExpired) {
+ Expired = true;
+ break;
+ }
- if (I->isInlineAsm() || I->isMetaInstruction())
- continue;
+ if (I->isInlineAsm() || I->isMetaInstruction())
+ continue;
- UpdateState(State, *I);
- }
+ UpdateState(State, *I);
+ }
- for (MachineBasicBlock *Pred : MBB->predecessors()) {
- if (!Visited.insert(Pred).second)
- continue;
+ if (!Expired) {
+ unsigned StateHash = State.getHashValue();
+ for (MachineBasicBlock *Pred : MBB->predecessors()) {
+ if (!Visited.insert(std::pair(Pred, StateHash)).second)
+ continue;
+ Worklist.emplace_back(Pred, State);
+ }
+ }
- if (hasHazard(State, IsHazard, UpdateState, Pred, Pred->instr_rbegin(),
- Visited))
- return true;
+ if (Worklist.empty())
+ break;
+
+ std::tie(MBB, State) = Worklist.pop_back_val();
+ I = MBB->instr_rbegin();
}
return false;
@@ -1624,6 +1637,10 @@ bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
SmallDenseMap<Register, int, 4> DefPos;
int ExecPos = std::numeric_limits<int>::max();
int VALUs = 0;
+
+ unsigned getHashValue() const {
+ return hash_combine(ExecPos, VALUs, hash_combine_range(DefPos));
+ }
};
StateType State;
@@ -1718,9 +1735,8 @@ bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
State.VALUs += 1;
};
- DenseSet<const MachineBasicBlock *> Visited;
if (!hasHazard<StateType>(State, IsHazardFn, UpdateStateFn, MI->getParent(),
- std::next(MI->getReverseIterator()), Visited))
+ std::next(MI->getReverseIterator())))
return false;
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
@@ -1761,6 +1777,8 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
struct StateType {
int VALUs = 0;
int TRANS = 0;
+
+ unsigned getHashValue() const { return hash_combine(VALUs, TRANS); }
};
StateType State;
@@ -1796,9 +1814,8 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
State.TRANS += 1;
};
- DenseSet<const MachineBasicBlock *> Visited;
if (!hasHazard<StateType>(State, IsHazardFn, UpdateStateFn, MI->getParent(),
- std::next(MI->getReverseIterator()), Visited))
+ std::next(MI->getReverseIterator())))
return false;
// Hazard is observed - insert a wait on va_dst counter to ensure hazard is
>From a4750fa915798e8bc6f61144e2c5beda81d208d5 Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson at amd.com>
Date: Sun, 18 May 2025 14:45:25 +0900
Subject: [PATCH 2/3] - Rework to use unified store of states - Handle hashing
collisions
---
.../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 62 ++++++++++++++++---
1 file changed, 55 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 644fbb77a495a..96de56b5717a9 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -436,7 +436,7 @@ using IsExpiredFn = function_ref<bool(const MachineInstr &, int WaitStates)>;
using GetNumWaitStatesFn = function_ref<unsigned int(const MachineInstr &)>;
// Search for a hazard in a block and its predecessors.
-// StateT must implement getHashValue().
+// StateT must implement getHashValue() and isEqual().
template <typename StateT>
static bool
hasHazard(StateT InitialState,
@@ -444,8 +444,47 @@ hasHazard(StateT InitialState,
function_ref<void(StateT &, const MachineInstr &)> UpdateState,
const MachineBasicBlock *InitialMBB,
MachineBasicBlock::const_reverse_instr_iterator InitialI) {
- SmallVector<std::pair<const MachineBasicBlock *, StateT>> Worklist;
- DenseSet<std::pair<const MachineBasicBlock *, unsigned>> Visited;
+ SmallVector<std::pair<const MachineBasicBlock *, unsigned>> Worklist;
+ SmallDenseSet<std::pair<const MachineBasicBlock *, unsigned>> Visited;
+ SmallVector<std::pair<unsigned, unsigned>, 1> Collisions;
+ SmallDenseMap<unsigned, unsigned> StateHash2Idx;
+ SmallVector<StateT> States;
+
+ // States contains a vector of unique state structures.
+ // StateT is hashed via getHashValue() and StateHash2Idx maps each hash
+ // to an index in the States vector.
+ // In the unlikely event of a hash collision the Collision vector provides
+ // additional hash to index associations which must be retrieved by a linear
+ // scan.
+
+ // Retrieve unique constant index for a StateT structure in the States vector.
+ auto ResolveStateIdx = [&](const StateT State) {
+ unsigned StateHash = State.getHashValue();
+ unsigned StateIdx;
+ if (!StateHash2Idx.contains(StateHash)) {
+ StateIdx = States.size();
+ States.push_back(State);
+ StateHash2Idx[StateHash] = StateIdx;
+ } else {
+ StateIdx = StateHash2Idx[StateHash];
+ if (LLVM_UNLIKELY(!StateT::isEqual(State, States[StateIdx]))) {
+ // Hash collision
+ auto *Collision = llvm::find_if(Collisions, [&](auto &C) {
+ return C.first == StateHash &&
+ StateT::isEqual(State, States[C.second]);
+ });
+ if (Collision) {
+ StateIdx = Collision->second;
+ } else {
+ StateIdx = States.size();
+ States.push_back(State);
+ Collisions.emplace_back(StateHash, StateIdx);
+ }
+ }
+ }
+ return StateIdx;
+ };
+
const MachineBasicBlock *MBB = InitialMBB;
StateT State = InitialState;
auto I = InitialI;
@@ -472,18 +511,20 @@ hasHazard(StateT InitialState,
}
if (!Expired) {
- unsigned StateHash = State.getHashValue();
+ unsigned StateIdx = ResolveStateIdx(State);
for (MachineBasicBlock *Pred : MBB->predecessors()) {
- if (!Visited.insert(std::pair(Pred, StateHash)).second)
+ if (!Visited.insert(std::pair(Pred, StateIdx)).second)
continue;
- Worklist.emplace_back(Pred, State);
+ Worklist.emplace_back(Pred, StateIdx);
}
}
if (Worklist.empty())
break;
- std::tie(MBB, State) = Worklist.pop_back_val();
+ unsigned StateIdx;
+ std::tie(MBB, StateIdx) = Worklist.pop_back_val();
+ State = States[StateIdx];
I = MBB->instr_rbegin();
}
@@ -1641,6 +1682,10 @@ bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
unsigned getHashValue() const {
return hash_combine(ExecPos, VALUs, hash_combine_range(DefPos));
}
+ static bool isEqual(const StateType &LHS, const StateType &RHS) {
+ return LHS.DefPos == RHS.DefPos && LHS.ExecPos == RHS.ExecPos &&
+ LHS.VALUs == RHS.VALUs;
+ }
};
StateType State;
@@ -1779,6 +1824,9 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
int TRANS = 0;
unsigned getHashValue() const { return hash_combine(VALUs, TRANS); }
+ static bool isEqual(const StateType &LHS, const StateType &RHS) {
+ return LHS.VALUs == RHS.VALUs && LHS.TRANS == RHS.TRANS;
+ }
};
StateType State;
>From c8c36ab93c081a7bced562373d6700b6f9a68a77 Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson at amd.com>
Date: Mon, 19 May 2025 10:36:08 +0900
Subject: [PATCH 3/3] - Fix use of llvm::find_if
---
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 96de56b5717a9..f80fe64cccd60 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -469,11 +469,11 @@ hasHazard(StateT InitialState,
StateIdx = StateHash2Idx[StateHash];
if (LLVM_UNLIKELY(!StateT::isEqual(State, States[StateIdx]))) {
// Hash collision
- auto *Collision = llvm::find_if(Collisions, [&](auto &C) {
+ auto Collision = llvm::find_if(Collisions, [&](auto &C) {
return C.first == StateHash &&
StateT::isEqual(State, States[C.second]);
});
- if (Collision) {
+ if (Collision != Collisions.end()) {
StateIdx = Collision->second;
} else {
StateIdx = States.size();
More information about the llvm-commits
mailing list