[llvm] [AMDGPU] Mitigate DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 bug (PR #153872)

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 15 13:39:45 PDT 2025


https://github.com/rampitec created https://github.com/llvm/llvm-project/pull/153872

DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 shall not be claused (we already do
not clause DS instructions) and needs waits before and after.

>From 158e84e98505d8193a63bc1339e4131c24e36eff Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Date: Fri, 15 Aug 2025 13:38:21 -0700
Subject: [PATCH] [AMDGPU] Mitigate DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 bug

DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 shall not be claused (we already do
not clause DS instructions) and needs waits before and after.
---
 llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 17 +++++++++++++++++
 llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h   |  1 +
 llvm/lib/Target/AMDGPU/GCNSubtarget.h          |  6 ++++++
 llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir   | 17 +++++++++++++++++
 4 files changed, 41 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 1f291ce5c5342..5e297c7540c48 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1202,6 +1202,8 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
   fixRequiredExportPriority(MI);
   if (ST.requiresWaitIdleBeforeGetReg())
     fixGetRegWaitIdle(MI);
+  if (ST.hasDsAtomicAsyncBarrierArriveB64PipeBug())
+    fixDsAtomicAsyncBarrierArriveB64(MI);
 }
 
 static bool isVCmpXWritesExec(const SIInstrInfo &TII, const SIRegisterInfo &TRI,
@@ -3451,3 +3453,18 @@ bool GCNHazardRecognizer::fixGetRegWaitIdle(MachineInstr *MI) {
       .addImm(0);
   return true;
 }
+
+bool GCNHazardRecognizer::fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI) {
+  if (MI->getOpcode() != AMDGPU::DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64)
+    return false;
+
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+          TII->get(AMDGPU::S_WAITCNT_DEPCTR))
+      .addImm(0xFFE3);
+  BuildMI(*MI->getParent(), std::next(MI->getIterator()), MI->getDebugLoc(),
+          TII->get(AMDGPU::S_WAITCNT_DEPCTR))
+      .addImm(0xFFE3);
+
+  return true;
+}
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index a078f50219c3c..890d5cbd154d6 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -111,6 +111,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
   bool fixVALUMaskWriteHazard(MachineInstr *MI);
   bool fixRequiredExportPriority(MachineInstr *MI);
   bool fixGetRegWaitIdle(MachineInstr *MI);
+  bool fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI);
 
   int checkMAIHazards(MachineInstr *MI);
   int checkMAIHazards908(MachineInstr *MI);
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 92de024cc6fcc..436f5c0801fad 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1815,6 +1815,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
     // to the same register.
     return false;
   }
+
+  // DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 shall not be claused with anything
+  // and surronded by S_WAIT_ALU(0xFFE3).
+  bool hasDsAtomicAsyncBarrierArriveB64PipeBug() const {
+    return getGeneration() == GFX12;
+  }
 };
 
 class GCNUserSGPRUsageInfo {
diff --git a/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir
new file mode 100644
index 0000000000000..f1dbabf1e1a83
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir
@@ -0,0 +1,17 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN %s
+
+---
+name: ds_atomic_async_barrier_arrive_b64
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; GCN-LABEL: name: ds_atomic_async_barrier_arrive_b64
+    ; GCN: liveins: $vgpr0, $vgpr1
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 65507
+    ; GCN-NEXT: DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 $vgpr1, 0, 0, implicit-def $asynccnt, implicit $asynccnt, implicit $exec
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 65507
+    DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 $vgpr1, 0, 0, implicit-def $asynccnt, implicit $asynccnt, implicit $exec
+...



More information about the llvm-commits mailing list