[llvm] [AMDGPU] Refactor hazard recognizer for VALU-pipeline hazards. NFCI. (PR #168801)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 19 17:07:04 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Stanislav Mekhanoshin (rampitec)
<details>
<summary>Changes</summary>
This is in preparation of handling these in scheduler. I do not expect
any changes to the produced code here, it is just an infrastructure.
Our current problem with the VALU pipeline hazards is that we only
insert V_NOP instructions in the hazard recognizer mode, but ignore
it during scheduling. This patch is meant to create a mechanism to
actually account for that during scheduling.
---
Full diff: https://github.com/llvm/llvm-project/pull/168801.diff
2 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp (+43-38)
- (modified) llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h (+10-3)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 29d22f27a2d8e..d07909251dcfb 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -437,9 +437,6 @@ void GCNHazardRecognizer::RecedeCycle() {
enum HazardFnResult { HazardFound, HazardExpired, NoHazardFound };
-using IsExpiredFn = function_ref<bool(const MachineInstr &, int WaitStates)>;
-using GetNumWaitStatesFn = function_ref<unsigned int(const MachineInstr &)>;
-
// Search for a hazard in a block and its predecessors.
template <typename StateT>
static bool
@@ -546,11 +543,14 @@ hasHazard(StateT InitialState,
// Returns a minimum wait states since \p I walking all predecessors.
// Only scans until \p IsExpired does not return true.
// Can only be run in a hazard recognizer mode.
-static int getWaitStatesSince(
- GCNHazardRecognizer::IsHazardFn IsHazard, const MachineBasicBlock *MBB,
- MachineBasicBlock::const_reverse_instr_iterator I, int WaitStates,
- IsExpiredFn IsExpired, DenseSet<const MachineBasicBlock *> &Visited,
- GetNumWaitStatesFn GetNumWaitStates = SIInstrInfo::getNumWaitStates) {
+static int
+getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
+ const MachineBasicBlock *MBB,
+ MachineBasicBlock::const_reverse_instr_iterator I,
+ int WaitStates, GCNHazardRecognizer::IsExpiredFn IsExpired,
+ DenseSet<const MachineBasicBlock *> &Visited,
+ GCNHazardRecognizer::GetNumWaitStatesFn GetNumWaitStates =
+ SIInstrInfo::getNumWaitStates) {
for (auto E = MBB->instr_rend(); I != E; ++I) {
// Don't add WaitStates for parent BUNDLE instructions.
if (I->isBundle())
@@ -582,20 +582,26 @@ static int getWaitStatesSince(
return MinWaitStates;
}
-static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
- const MachineInstr *MI, IsExpiredFn IsExpired) {
+static int
+getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
+ const MachineInstr *MI,
+ GCNHazardRecognizer::IsExpiredFn IsExpired,
+ GCNHazardRecognizer::GetNumWaitStatesFn GetNumWaitStates =
+ SIInstrInfo::getNumWaitStates) {
DenseSet<const MachineBasicBlock *> Visited;
return getWaitStatesSince(IsHazard, MI->getParent(),
std::next(MI->getReverseIterator()), 0, IsExpired,
- Visited, SIInstrInfo::getNumWaitStates);
+ Visited, GetNumWaitStates);
}
-int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) {
+int GCNHazardRecognizer::getWaitStatesSince(
+ IsHazardFn IsHazard, int Limit, GetNumWaitStatesFn GetNumWaitStates) {
if (IsHazardRecognizerMode) {
auto IsExpiredFn = [Limit](const MachineInstr &, int WaitStates) {
return WaitStates >= Limit;
};
- return ::getWaitStatesSince(IsHazard, CurrCycleInstr, IsExpiredFn);
+ return ::getWaitStatesSince(IsHazard, CurrCycleInstr, IsExpiredFn,
+ GetNumWaitStates);
}
int WaitStates = 0;
@@ -607,7 +613,7 @@ int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) {
if (MI->isInlineAsm())
continue;
}
- ++WaitStates;
+ WaitStates += MI ? GetNumWaitStates(*MI) : 1;
if (WaitStates >= Limit)
break;
@@ -1243,6 +1249,20 @@ int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn, ReadM0WaitStates);
}
+// emit V_NOP instructions. \p WaitStatesNeeded is the number of V_NOPs we need
+// to insert, negative means not needed.
+bool GCNHazardRecognizer::emitVNops(MachineInstr *MI, int WaitStatesNeeded) {
+ if (WaitStatesNeeded <= 0)
+ return false;
+
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ for (int I = 0; I < WaitStatesNeeded; ++I)
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII->get(AMDGPU::V_NOP_e32));
+
+ return true;
+}
+
void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
fixVMEMtoScalarWriteHazards(MI);
fixVcmpxPermlaneHazards(MI);
@@ -1257,7 +1277,7 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
fixVALUTransUseHazard(MI);
fixVALUTransCoexecutionHazards(MI);
fixWMMAHazards(MI); // fall-through if co-execution is enabled.
- fixWMMACoexecutionHazards(MI);
+ emitVNops(MI, checkWMMACoexecutionHazards(MI));
fixShift64HighRegBug(MI);
fixVALUMaskWriteHazard(MI);
fixRequiredExportPriority(MI);
@@ -2045,13 +2065,13 @@ static bool IsWMMAHazardInstInCategory(const MachineInstr &MI,
return false;
}
-bool GCNHazardRecognizer::fixWMMACoexecutionHazards(MachineInstr *MI) {
+int GCNHazardRecognizer::checkWMMACoexecutionHazards(MachineInstr *MI) {
if (!AMDGPU::isGFX1250(ST))
- return false;
+ return 0;
const SIInstrInfo *TII = ST.getInstrInfo();
if (!TII->isXDLWMMA(*MI) && !isCoexecutableVALUInst(*MI))
- return false;
+ return 0;
const SIRegisterInfo *TRI = ST.getRegisterInfo();
@@ -2129,9 +2149,6 @@ bool GCNHazardRecognizer::fixWMMACoexecutionHazards(MachineInstr *MI) {
};
int Limit = 0;
- auto IsExpiredFn = [&Limit](const MachineInstr &, int WaitStates) {
- return WaitStates >= Limit;
- };
auto GetWaitStatesFn = [](const MachineInstr &I) {
return SIInstrInfo::isVALU(I) ? 1 : 0;
@@ -2141,38 +2158,26 @@ bool GCNHazardRecognizer::fixWMMACoexecutionHazards(MachineInstr *MI) {
if (TII->isXDLWMMA(*MI)) {
for (Category = 0; WaitStatesNeeded < 0 && Category < 4; Category++) {
Limit = WMMAWaitStates[Category]; // for IsExpiredFn.
- DenseSet<const MachineBasicBlock *> Visited;
- // '::getWaitStatesSince' returns the number of VALUs in between if hazard
+ // 'getWaitStatesSince' returns the number of VALUs in between if hazard
// exists, and INT_MAX if there is no hazard. As a result, a negative
// WaitStatesNeeded here means no hazard, and we will continue to search
// for other categories.
WaitStatesNeeded =
- Limit - ::getWaitStatesSince(IsWMMAHazardFn, MI->getParent(),
- std::next(MI->getReverseIterator()), 0,
- IsExpiredFn, Visited, GetWaitStatesFn);
+ Limit - getWaitStatesSince(IsWMMAHazardFn, Limit, GetWaitStatesFn);
}
} else { // Must be a co-executable VALU.
for (Category = 0; WaitStatesNeeded < 0 && Category < 4; Category++) {
Limit = VALUWaitStates[Category]; // for IsExpiredFn.
- DenseSet<const MachineBasicBlock *> Visited;
- // '::getWaitStatesSince' returns the number of VALUs in between if hazard
+ // 'getWaitStatesSince' returns the number of VALUs in between if hazard
// exists, and INT_MAX if there is no hazard. As a result, a negative
// WaitStatesNeeded here means no hazard, and we will continue to search
// for other categories.
WaitStatesNeeded =
- Limit - ::getWaitStatesSince(IsVALUHazardFn, MI->getParent(),
- std::next(MI->getReverseIterator()), 0,
- IsExpiredFn, Visited, GetWaitStatesFn);
+ Limit - getWaitStatesSince(IsVALUHazardFn, Limit, GetWaitStatesFn);
}
}
- // WaitStatesNeeded now is the number of V_NOPs we need to insert, negative
- // means not needed.
- for (int i = 0; i < WaitStatesNeeded; i++)
- BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
- TII->get(AMDGPU::V_NOP_e32));
-
- return true;
+ return WaitStatesNeeded;
}
bool GCNHazardRecognizer::fixShift64HighRegBug(MachineInstr *MI) {
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index 67beffadc0913..e5df71ca3788c 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -13,6 +13,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPUHAZARDRECOGNIZERS_H
#define LLVM_LIB_TARGET_AMDGPUHAZARDRECOGNIZERS_H
+#include "SIInstrInfo.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
@@ -25,13 +26,14 @@ class MachineFunction;
class MachineInstr;
class MachineOperand;
class MachineRegisterInfo;
-class SIInstrInfo;
class SIRegisterInfo;
class GCNSubtarget;
class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
public:
typedef function_ref<bool(const MachineInstr &)> IsHazardFn;
+ typedef function_ref<bool(const MachineInstr &, int WaitStates)> IsExpiredFn;
+ typedef function_ref<unsigned int(const MachineInstr &)> GetNumWaitStatesFn;
private:
// Distinguish if we are called from scheduler or hazard recognizer
@@ -74,7 +76,9 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
// used on a newly inserted instruction before returning from PreEmitNoops.
void runOnInstruction(MachineInstr *MI);
- int getWaitStatesSince(IsHazardFn IsHazard, int Limit);
+ int getWaitStatesSince(
+ IsHazardFn IsHazard, int Limit,
+ GetNumWaitStatesFn GetNumWaitStates = SIInstrInfo::getNumWaitStates);
int getWaitStatesSinceDef(unsigned Reg, IsHazardFn IsHazardDef, int Limit);
int getWaitStatesSinceSetReg(IsHazardFn IsHazard, int Limit);
@@ -94,6 +98,9 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
int checkReadM0Hazards(MachineInstr *SMovRel);
int checkNSAtoVMEMHazard(MachineInstr *MI);
int checkFPAtomicToDenormModeHazard(MachineInstr *MI);
+ // emit V_NOP instructions. \p WaitStatesNeeded is the number of V_NOPs we
+ // need to insert, negative means not needed.
+ bool emitVNops(MachineInstr *MI, int WaitStatesNeeded);
void fixHazards(MachineInstr *MI);
bool fixVcmpxPermlaneHazards(MachineInstr *MI);
bool fixVMEMtoScalarWriteHazards(MachineInstr *MI);
@@ -106,7 +113,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
bool fixVALUTransUseHazard(MachineInstr *MI);
bool fixVALUTransCoexecutionHazards(MachineInstr *MI);
bool fixWMMAHazards(MachineInstr *MI);
- bool fixWMMACoexecutionHazards(MachineInstr *MI);
+ int checkWMMACoexecutionHazards(MachineInstr *MI);
bool fixShift64HighRegBug(MachineInstr *MI);
bool fixVALUMaskWriteHazard(MachineInstr *MI);
bool fixRequiredExportPriority(MachineInstr *MI);
``````````
</details>
https://github.com/llvm/llvm-project/pull/168801
More information about the llvm-commits
mailing list