[llvm] AMDGPU: Handle the co-execition hazards for TRANS for gfx1250 (PR #149024)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 15 23:40:47 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Changpeng Fang (changpeng)

<details>
<summary>Changes</summary>

  For the co-execution of the TRANS ops, the requirement is: 1 independent
op or V_NOP (since TRANS takes 2 cycles) after the trans op before its
sources can be overwritten or the output of the trans op can be used.

---
Full diff: https://github.com/llvm/llvm-project/pull/149024.diff


3 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp (+46) 
- (modified) llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h (+1) 
- (added) llvm/test/CodeGen/AMDGPU/trans-coexecution-hazard.mir (+132) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 0976fccf78d86..bbed828b4fed3 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1189,6 +1189,7 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
   }
   fixVALUPartialForwardingHazard(MI);
   fixVALUTransUseHazard(MI);
+  fixVALUTransCoexecutionHazards(MI);
   fixWMMAHazards(MI);
   fixShift64HighRegBug(MI);
   fixVALUMaskWriteHazard(MI);
@@ -1809,6 +1810,51 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
   return true;
 }
 
+bool GCNHazardRecognizer::fixVALUTransCoexecutionHazards(MachineInstr *MI) {
+  if (!AMDGPU::isGFX1250(ST) || // Coexecution disabled.
+      !SIInstrInfo::isVALU(*MI) || SIInstrInfo::isTRANS(*MI))
+    return false;
+
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
+
+  auto IsTransHazardFn = [MI, TII, TRI](const MachineInstr &I) {
+    if (!SIInstrInfo::isTRANS(I))
+      return false;
+
+    // RAW: Trans(I) writes, VALU(MI) reads.
+    Register TransDef = TII->getNamedOperand(I, AMDGPU::OpName::vdst)->getReg();
+    for (const MachineOperand &ValuUse : MI->explicit_uses()) {
+      if (ValuUse.isReg() && TRI->regsOverlap(TransDef, ValuUse.getReg()))
+        return true;
+    }
+
+    auto *ValuDst = TII->getNamedOperand(*MI, AMDGPU::OpName::vdst);
+    if (!ValuDst || !ValuDst->isReg())
+      return false;
+
+    // WAR: Trans(I) reads, VALU(MI) writes.
+    Register ValuDef = ValuDst->getReg();
+    for (const MachineOperand &TransUse : I.explicit_uses()) {
+      if (TransUse.isReg() && TRI->regsOverlap(ValuDef, TransUse.getReg()))
+        return true;
+    }
+
+    return false;
+  };
+
+  auto IsExpiredFn = [](const MachineInstr &I, int) {
+    return SIInstrInfo::isVALU(I);
+  };
+
+  const int HasVALU = std::numeric_limits<int>::max();
+  if (::getWaitStatesSince(IsTransHazardFn, MI, IsExpiredFn) == HasVALU)
+    return false;
+
+  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::V_NOP_e32));
+  return true;
+}
+
 bool GCNHazardRecognizer::fixWMMAHazards(MachineInstr *MI) {
   if (!SIInstrInfo::isWMMA(*MI) && !SIInstrInfo::isSWMMAC(*MI))
     return false;
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index bbc55851bf967..ef6ddd874f58a 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -104,6 +104,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
   bool fixLdsDirectVMEMHazard(MachineInstr *MI);
   bool fixVALUPartialForwardingHazard(MachineInstr *MI);
   bool fixVALUTransUseHazard(MachineInstr *MI);
+  bool fixVALUTransCoexecutionHazards(MachineInstr *MI);
   bool fixWMMAHazards(MachineInstr *MI);
   bool fixShift64HighRegBug(MachineInstr *MI);
   bool fixVALUMaskWriteHazard(MachineInstr *MI);
diff --git a/llvm/test/CodeGen/AMDGPU/trans-coexecution-hazard.mir b/llvm/test/CodeGen/AMDGPU/trans-coexecution-hazard.mir
new file mode 100644
index 0000000000000..fa27d689dd8dd
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/trans-coexecution-hazard.mir
@@ -0,0 +1,132 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefixes=GCN,GFX1250 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefixes=GCN,GFX1200 %s
+
+---
+name:            trans_writes_valu_reads_hazard
+body:            |
+  bb.0:
+    ; GFX1250-LABEL: name: trans_writes_valu_reads_hazard
+    ; GFX1250: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+    ; GFX1250-NEXT: $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+    ;
+    ; GFX1200-LABEL: name: trans_writes_valu_reads_hazard
+    ; GFX1200: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    ; GFX1200-NEXT: $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+    $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+...
+
+---
+name:            trans_writes_valu_valu_reads_hazard_covered
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: trans_writes_valu_valu_reads_hazard_covered
+    ; GCN: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr4 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+    $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    $vgpr2 = V_ADD_F32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec
+    $vgpr4 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+...
+
+---
+name:            trans_writes_salu_valu_reads_hazard
+body:            |
+  bb.0:
+    ; GFX1250-LABEL: name: trans_writes_salu_valu_reads_hazard
+    ; GFX1250: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    ; GFX1250-NEXT: $sgpr2 = S_ADD_U32 $sgpr0, $sgpr1, implicit-def $scc
+    ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+    ; GFX1250-NEXT: $vgpr4 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+    ;
+    ; GFX1200-LABEL: name: trans_writes_salu_valu_reads_hazard
+    ; GFX1200: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    ; GFX1200-NEXT: $sgpr2 = S_ADD_U32 $sgpr0, $sgpr1, implicit-def $scc
+    ; GFX1200-NEXT: $vgpr4 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+    $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    $sgpr2 = S_ADD_U32 $sgpr0, $sgpr1, implicit-def $scc
+    $vgpr4 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+...
+
+---
+name:            trans_no_hazard
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: trans_no_hazard
+    ; GCN: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr3 = V_ADD_F32_e32 $vgpr0, $vgpr2, implicit $mode, implicit $exec
+    $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    $vgpr3 = V_ADD_F32_e32 $vgpr0, $vgpr2, implicit $mode, implicit $exec
+...
+
+---
+name:            trans_reads_valu_writes_hazard
+body:            |
+  bb.0:
+    ; GFX1250-LABEL: name: trans_reads_valu_writes_hazard
+    ; GFX1250: $vgpr1 = V_COS_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+    ; GFX1250-NEXT: $vgpr0 = V_ADD_F32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec
+    ;
+    ; GFX1200-LABEL: name: trans_reads_valu_writes_hazard
+    ; GFX1200: $vgpr1 = V_COS_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    ; GFX1200-NEXT: $vgpr0 = V_ADD_F32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec
+    $vgpr1 = V_COS_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    $vgpr0 = V_ADD_F32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec
+...
+
+---
+name:            trans_reads_valu_valu_writes_hazard_covered
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: trans_reads_valu_valu_writes_hazard_covered
+    ; GCN: $vgpr1 = V_COS_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr0 = V_ADD_F32_e32 $vgpr4, $vgpr2, implicit $mode, implicit $exec
+    $vgpr1 = V_COS_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    $vgpr2 = V_ADD_F32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec
+    $vgpr0 = V_ADD_F32_e32 $vgpr4, $vgpr2, implicit $mode, implicit $exec
+...
+
+---
+name:            trans_reads__salu_valu_writes_hazard
+body:            |
+  bb.0:
+    ; GFX1250-LABEL: name: trans_reads__salu_valu_writes_hazard
+    ; GFX1250: $vgpr1 = V_COS_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    ; GFX1250-NEXT: $sgpr2 = S_ADD_U32 $sgpr0, $sgpr1, implicit-def $scc
+    ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+    ; GFX1250-NEXT: $vgpr0 = V_ADD_F32_e32 $vgpr4, $vgpr2, implicit $mode, implicit $exec
+    ;
+    ; GFX1200-LABEL: name: trans_reads__salu_valu_writes_hazard
+    ; GFX1200: $vgpr1 = V_COS_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    ; GFX1200-NEXT: $sgpr2 = S_ADD_U32 $sgpr0, $sgpr1, implicit-def $scc
+    ; GFX1200-NEXT: $vgpr0 = V_ADD_F32_e32 $vgpr4, $vgpr2, implicit $mode, implicit $exec
+    $vgpr1 = V_COS_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    $sgpr2 = S_ADD_U32 $sgpr0, $sgpr1, implicit-def $scc
+    $vgpr0 = V_ADD_F32_e32 $vgpr4, $vgpr2, implicit $mode, implicit $exec
+...
+
+---
+name:            trans_writes_trans_reads_no_hazard
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: trans_writes_trans_reads_no_hazard
+    ; GCN: $vgpr1 = V_COS_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr2 = V_SQRT_F32_e32 $vgpr1, implicit $mode, implicit $exec
+    $vgpr1 = V_COS_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    $vgpr2 = V_SQRT_F32_e32 $vgpr1, implicit $mode, implicit $exec
+...
+
+---
+name:            trans_reads_trans_writes_no_hazard
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: trans_reads_trans_writes_no_hazard
+    ; GCN: $vgpr1 = V_COS_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr0 = V_SQRT_F32_e32 $vgpr2, implicit $mode, implicit $exec
+    $vgpr1 = V_COS_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    $vgpr0 = V_SQRT_F32_e32 $vgpr2, implicit $mode, implicit $exec
+...

``````````

</details>


https://github.com/llvm/llvm-project/pull/149024


More information about the llvm-commits mailing list