[llvm] [AMDGPU] Use encodeFieldVaVdst in hazard recognizer. NFCI. (PR #153881)
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 15 17:19:36 PDT 2025
https://github.com/rampitec updated https://github.com/llvm/llvm-project/pull/153881
>From d9f7d86ddeb3e4e6c24363ca24dd718362039667 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Date: Fri, 15 Aug 2025 14:03:15 -0700
Subject: [PATCH 1/3] [AMDGPU] w/a for s_setreg_b32 gfx1250 hazard with MODE
register
---
.../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 12 +++++
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h | 1 +
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 ++
llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir | 54 +++++++++++++++++++
4 files changed, 71 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index dd7c1914d3440..c1cca063aac6f 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1206,6 +1206,8 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
fixDsAtomicAsyncBarrierArriveB64(MI);
if (ST.hasScratchBaseForwardingHazard())
fixScratchBaseForwardingHazard(MI);
+ if (ST.setRegModeNeedsVNOPs())
+ fixSetRegMode(MI);
}
static bool isVCmpXWritesExec(const SIInstrInfo &TII, const SIRegisterInfo &TRI,
@@ -3546,3 +3548,13 @@ bool GCNHazardRecognizer::fixScratchBaseForwardingHazard(MachineInstr *MI) {
AMDGPU::DepCtr::encodeFieldSaSdst(0), 0));
return true;
}
+
+bool GCNHazardRecognizer::fixSetRegMode(MachineInstr *MI) {
+ if (!isSSetReg(MI->getOpcode()) ||
+ MI->getOperand(1).getImm() != AMDGPU::Hwreg::ID_MODE)
+ return false;
+
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::V_NOP_e32));
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::V_NOP_e32));
+ return true;
+}
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index e0982b46424b9..67beffadc0913 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -113,6 +113,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
bool fixGetRegWaitIdle(MachineInstr *MI);
bool fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI);
bool fixScratchBaseForwardingHazard(MachineInstr *MI);
+ bool fixSetRegMode(MachineInstr *MI);
int checkMAIHazards(MachineInstr *MI);
int checkMAIHazards908(MachineInstr *MI);
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 404a476a3076a..2a8385df3f934 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1345,6 +1345,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasVALUReadSGPRHazard() const { return GFX12Insts && !GFX1250Insts; }
+ bool setRegModeNeedsVNOPs() const {
+ return GFX1250Insts && getGeneration() == GFX12;
+ }
+
/// Return if operations acting on VGPR tuples require even alignment.
bool needsAlignedVGPRs() const { return GFX90AInsts || GFX1250Insts; }
diff --git a/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir
index f4596b0832d97..170478539d8a9 100644
--- a/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir
+++ b/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir
@@ -493,3 +493,57 @@ body: |
liveins: $vgpr0
$vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
...
+
+---
+name: s_setreg_b32_hwreg_mode
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+ ; GCN-LABEL: name: s_setreg_b32_hwreg_mode
+ ; GCN: liveins: $sgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: V_NOP_e32 implicit $exec
+ ; GCN-NEXT: V_NOP_e32 implicit $exec
+ ; GCN-NEXT: S_SETREG_B32 $sgpr0, 1, implicit-def $mode, implicit $mode
+ S_SETREG_B32 $sgpr0, 1, implicit-def $mode, implicit $mode
+...
+
+---
+name: s_setreg_b32_mode
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+ ; GCN-LABEL: name: s_setreg_b32_mode
+ ; GCN: liveins: $sgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: V_NOP_e32 implicit $exec
+ ; GCN-NEXT: V_NOP_e32 implicit $exec
+ ; GCN-NEXT: S_SETREG_B32_mode $sgpr0, 1, implicit-def $mode, implicit $mode
+ S_SETREG_B32_mode $sgpr0, 1, implicit-def $mode, implicit $mode
+...
+
+---
+name: s_setreg_imm32_b32_hwreg_mode
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: s_setreg_imm32_b32_hwreg_mode
+ ; GCN: V_NOP_e32 implicit $exec
+ ; GCN-NEXT: V_NOP_e32 implicit $exec
+ ; GCN-NEXT: S_SETREG_IMM32_B32 1, 1, implicit-def $mode, implicit $mode
+ S_SETREG_IMM32_B32 1, 1, implicit-def $mode, implicit $mode
+...
+
+---
+name: s_setreg_imm32_b32_mode
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: s_setreg_imm32_b32_mode
+ ; GCN: V_NOP_e32 implicit $exec
+ ; GCN-NEXT: V_NOP_e32 implicit $exec
+ ; GCN-NEXT: S_SETREG_IMM32_B32_mode 1, 1, implicit-def $mode, implicit $mode
+ S_SETREG_IMM32_B32_mode 1, 1, implicit-def $mode, implicit $mode
+...
>From df0ab0abe2132a729ec1ad18f20faa9b804f0a6f Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Date: Fri, 15 Aug 2025 14:10:55 -0700
Subject: [PATCH 2/3] [AMDGPU] Update GCNHazardRecognizer's understanding of
gfx12 waitcount instructions
This simply updates the pass's cognizance of these instructions, and for the
most part the hazards where they might be encountered do not exist for gfx12.
Nonetheless, encountering them has to be checked for as doing so would indicate
a compiler error.
Co-authored-by: Stephen Thomas <Stephen.Thomas at amd.com>
---
.../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index c1cca063aac6f..fa3ca27a5f47c 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1357,6 +1357,16 @@ bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
// DsCnt corresponds to LGKMCnt here.
return (Decoded.DsCnt == 0);
}
+ case AMDGPU::S_WAIT_STORECNT:
+ case AMDGPU::S_WAIT_STORECNT_DSCNT:
+ case AMDGPU::S_WAIT_LOADCNT:
+ case AMDGPU::S_WAIT_LOADCNT_DSCNT:
+ case AMDGPU::S_WAIT_SAMPLECNT:
+ case AMDGPU::S_WAIT_BVHCNT:
+ case AMDGPU::S_WAIT_DSCNT:
+ case AMDGPU::S_WAIT_EXPCNT:
+ case AMDGPU::S_WAIT_KMCNT:
+ llvm_unreachable("unexpected wait count instruction");
default:
// SOPP instructions cannot mitigate the hazard.
if (TII->isSOPP(MI))
@@ -2254,6 +2264,15 @@ int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
case AMDGPU::S_WAITCNT_EXPCNT:
case AMDGPU::S_WAITCNT_LGKMCNT:
case AMDGPU::S_WAIT_IDLE:
+ case AMDGPU::S_WAIT_LOADCNT:
+ case AMDGPU::S_WAIT_LOADCNT_DSCNT:
+ case AMDGPU::S_WAIT_SAMPLECNT:
+ case AMDGPU::S_WAIT_BVHCNT:
+ case AMDGPU::S_WAIT_STORECNT:
+ case AMDGPU::S_WAIT_STORECNT_DSCNT:
+ case AMDGPU::S_WAIT_EXPCNT:
+ case AMDGPU::S_WAIT_DSCNT:
+ case AMDGPU::S_WAIT_KMCNT:
return true;
default:
break;
>From 4ee7e8bf3a0cd4036b601a12bbb5bba61deda993 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Date: Fri, 15 Aug 2025 14:18:56 -0700
Subject: [PATCH 3/3] [AMDGPU] Use encodeFieldVaVdst in hazard recognizer.
NFCI.
Co-authored-by: Stephen Thomas <Stephen.Thomas at amd.com>
---
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index fa3ca27a5f47c..49a681efc79c7 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1749,7 +1749,7 @@ bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
TII.get(AMDGPU::S_WAITCNT_DEPCTR))
- .addImm(0x0fff);
+ .addImm(AMDGPU::DepCtr::encodeFieldVaVdst(0));
return true;
}
@@ -1799,7 +1799,7 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isDS(I) ||
SIInstrInfo::isEXP(I) ||
(I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
- I.getOperand(0).getImm() == 0x0fff))
+ AMDGPU::DepCtr::decodeFieldVaVdst(I.getOperand(0).getImm()) == 0))
return HazardExpired;
// Track registers writes
More information about the llvm-commits
mailing list