[llvm] AMDGPU: Handle the co-execition hazards for TRANS for gfx1250 (PR #149024)
Changpeng Fang via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 15 23:40:15 PDT 2025
https://github.com/changpeng created https://github.com/llvm/llvm-project/pull/149024
For the co-execution of the TRANS ops, the requirement is: 1 independent
op or V_NOP (since TRANS takes 2 cycles) after the trans op before its
sources can be overwritten or the output of the trans op can be used.
>From 463f2007081ce3d6b87bcc683cc7ab563f86b44c Mon Sep 17 00:00:00 2001
From: Changpeng Fang <changpeng.fang at amd.com>
Date: Tue, 15 Jul 2025 23:35:59 -0700
Subject: [PATCH] AMDGPU: Handle the co-execition hazards for TRANS for gfx1250
For the co-execution of the TRANS ops, the requirement is: 1 independent
op or V_NOP (since TRANS takes 2 cycles) after the trans op before its
sources can be overwritten or the output of the trans op can be used.
---
.../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 46 ++++++
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h | 1 +
.../AMDGPU/trans-coexecution-hazard.mir | 132 ++++++++++++++++++
3 files changed, 179 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/trans-coexecution-hazard.mir
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 0976fccf78d86..bbed828b4fed3 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1189,6 +1189,7 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
}
fixVALUPartialForwardingHazard(MI);
fixVALUTransUseHazard(MI);
+ fixVALUTransCoexecutionHazards(MI);
fixWMMAHazards(MI);
fixShift64HighRegBug(MI);
fixVALUMaskWriteHazard(MI);
@@ -1809,6 +1810,51 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
return true;
}
+bool GCNHazardRecognizer::fixVALUTransCoexecutionHazards(MachineInstr *MI) {
+ if (!AMDGPU::isGFX1250(ST) || // Coexecution disabled.
+ !SIInstrInfo::isVALU(*MI) || SIInstrInfo::isTRANS(*MI))
+ return false;
+
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+
+ auto IsTransHazardFn = [MI, TII, TRI](const MachineInstr &I) {
+ if (!SIInstrInfo::isTRANS(I))
+ return false;
+
+ // RAW: Trans(I) writes, VALU(MI) reads.
+ Register TransDef = TII->getNamedOperand(I, AMDGPU::OpName::vdst)->getReg();
+ for (const MachineOperand &ValuUse : MI->explicit_uses()) {
+ if (ValuUse.isReg() && TRI->regsOverlap(TransDef, ValuUse.getReg()))
+ return true;
+ }
+
+ auto *ValuDst = TII->getNamedOperand(*MI, AMDGPU::OpName::vdst);
+ if (!ValuDst || !ValuDst->isReg())
+ return false;
+
+ // WAR: Trans(I) reads, VALU(MI) writes.
+ Register ValuDef = ValuDst->getReg();
+ for (const MachineOperand &TransUse : I.explicit_uses()) {
+ if (TransUse.isReg() && TRI->regsOverlap(ValuDef, TransUse.getReg()))
+ return true;
+ }
+
+ return false;
+ };
+
+ auto IsExpiredFn = [](const MachineInstr &I, int) {
+ return SIInstrInfo::isVALU(I);
+ };
+
+ const int HasVALU = std::numeric_limits<int>::max();
+ if (::getWaitStatesSince(IsTransHazardFn, MI, IsExpiredFn) == HasVALU)
+ return false;
+
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::V_NOP_e32));
+ return true;
+}
+
bool GCNHazardRecognizer::fixWMMAHazards(MachineInstr *MI) {
if (!SIInstrInfo::isWMMA(*MI) && !SIInstrInfo::isSWMMAC(*MI))
return false;
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index bbc55851bf967..ef6ddd874f58a 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -104,6 +104,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
bool fixLdsDirectVMEMHazard(MachineInstr *MI);
bool fixVALUPartialForwardingHazard(MachineInstr *MI);
bool fixVALUTransUseHazard(MachineInstr *MI);
+ bool fixVALUTransCoexecutionHazards(MachineInstr *MI);
bool fixWMMAHazards(MachineInstr *MI);
bool fixShift64HighRegBug(MachineInstr *MI);
bool fixVALUMaskWriteHazard(MachineInstr *MI);
diff --git a/llvm/test/CodeGen/AMDGPU/trans-coexecution-hazard.mir b/llvm/test/CodeGen/AMDGPU/trans-coexecution-hazard.mir
new file mode 100644
index 0000000000000..fa27d689dd8dd
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/trans-coexecution-hazard.mir
@@ -0,0 +1,132 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefixes=GCN,GFX1250 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefixes=GCN,GFX1200 %s
+
+---
+name: trans_writes_valu_reads_hazard
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: trans_writes_valu_reads_hazard
+ ; GFX1250: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+ ;
+ ; GFX1200-LABEL: name: trans_writes_valu_reads_hazard
+ ; GFX1200: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GFX1200-NEXT: $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+ $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+...
+
+---
+name: trans_writes_valu_valu_reads_hazard_covered
+body: |
+ bb.0:
+ ; GCN-LABEL: name: trans_writes_valu_valu_reads_hazard_covered
+ ; GCN: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr4 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+ $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ $vgpr2 = V_ADD_F32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec
+ $vgpr4 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+...
+
+---
+name: trans_writes_salu_valu_reads_hazard
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: trans_writes_salu_valu_reads_hazard
+ ; GFX1250: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GFX1250-NEXT: $sgpr2 = S_ADD_U32 $sgpr0, $sgpr1, implicit-def $scc
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: $vgpr4 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+ ;
+ ; GFX1200-LABEL: name: trans_writes_salu_valu_reads_hazard
+ ; GFX1200: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GFX1200-NEXT: $sgpr2 = S_ADD_U32 $sgpr0, $sgpr1, implicit-def $scc
+ ; GFX1200-NEXT: $vgpr4 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+ $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ $sgpr2 = S_ADD_U32 $sgpr0, $sgpr1, implicit-def $scc
+ $vgpr4 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+...
+
+---
+name: trans_no_hazard
+body: |
+ bb.0:
+ ; GCN-LABEL: name: trans_no_hazard
+ ; GCN: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr3 = V_ADD_F32_e32 $vgpr0, $vgpr2, implicit $mode, implicit $exec
+ $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ $vgpr3 = V_ADD_F32_e32 $vgpr0, $vgpr2, implicit $mode, implicit $exec
+...
+
+---
+name: trans_reads_valu_writes_hazard
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: trans_reads_valu_writes_hazard
+ ; GFX1250: $vgpr1 = V_COS_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: $vgpr0 = V_ADD_F32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec
+ ;
+ ; GFX1200-LABEL: name: trans_reads_valu_writes_hazard
+ ; GFX1200: $vgpr1 = V_COS_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GFX1200-NEXT: $vgpr0 = V_ADD_F32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec
+ $vgpr1 = V_COS_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ $vgpr0 = V_ADD_F32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec
+...
+
+---
+name: trans_reads_valu_valu_writes_hazard_covered
+body: |
+ bb.0:
+ ; GCN-LABEL: name: trans_reads_valu_valu_writes_hazard_covered
+ ; GCN: $vgpr1 = V_COS_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr0 = V_ADD_F32_e32 $vgpr4, $vgpr2, implicit $mode, implicit $exec
+ $vgpr1 = V_COS_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ $vgpr2 = V_ADD_F32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec
+ $vgpr0 = V_ADD_F32_e32 $vgpr4, $vgpr2, implicit $mode, implicit $exec
+...
+
+---
+name: trans_reads__salu_valu_writes_hazard
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: trans_reads__salu_valu_writes_hazard
+ ; GFX1250: $vgpr1 = V_COS_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GFX1250-NEXT: $sgpr2 = S_ADD_U32 $sgpr0, $sgpr1, implicit-def $scc
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: $vgpr0 = V_ADD_F32_e32 $vgpr4, $vgpr2, implicit $mode, implicit $exec
+ ;
+ ; GFX1200-LABEL: name: trans_reads__salu_valu_writes_hazard
+ ; GFX1200: $vgpr1 = V_COS_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GFX1200-NEXT: $sgpr2 = S_ADD_U32 $sgpr0, $sgpr1, implicit-def $scc
+ ; GFX1200-NEXT: $vgpr0 = V_ADD_F32_e32 $vgpr4, $vgpr2, implicit $mode, implicit $exec
+ $vgpr1 = V_COS_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ $sgpr2 = S_ADD_U32 $sgpr0, $sgpr1, implicit-def $scc
+ $vgpr0 = V_ADD_F32_e32 $vgpr4, $vgpr2, implicit $mode, implicit $exec
+...
+
+---
+name: trans_writes_trans_reads_no_hazard
+body: |
+ bb.0:
+ ; GCN-LABEL: name: trans_writes_trans_reads_no_hazard
+ ; GCN: $vgpr1 = V_COS_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr2 = V_SQRT_F32_e32 $vgpr1, implicit $mode, implicit $exec
+ $vgpr1 = V_COS_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ $vgpr2 = V_SQRT_F32_e32 $vgpr1, implicit $mode, implicit $exec
+...
+
+---
+name: trans_reads_trans_writes_no_hazard
+body: |
+ bb.0:
+ ; GCN-LABEL: name: trans_reads_trans_writes_no_hazard
+ ; GCN: $vgpr1 = V_COS_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr0 = V_SQRT_F32_e32 $vgpr2, implicit $mode, implicit $exec
+ $vgpr1 = V_COS_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ $vgpr0 = V_SQRT_F32_e32 $vgpr2, implicit $mode, implicit $exec
+...
More information about the llvm-commits
mailing list