[llvm] [AMDGPU] Mitigate DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 bug (PR #153872)
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 15 13:39:45 PDT 2025
https://github.com/rampitec created https://github.com/llvm/llvm-project/pull/153872
DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 shall not be claused (we already do
not clause DS instructions) and needs waits before and after.
>From 158e84e98505d8193a63bc1339e4131c24e36eff Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Date: Fri, 15 Aug 2025 13:38:21 -0700
Subject: [PATCH] [AMDGPU] Mitigate DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 bug
DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 shall not be claused (we already do
not clause DS instructions) and needs waits before and after.
---
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 17 +++++++++++++++++
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h | 1 +
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 6 ++++++
llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir | 17 +++++++++++++++++
4 files changed, 41 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 1f291ce5c5342..5e297c7540c48 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1202,6 +1202,8 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
fixRequiredExportPriority(MI);
if (ST.requiresWaitIdleBeforeGetReg())
fixGetRegWaitIdle(MI);
+ if (ST.hasDsAtomicAsyncBarrierArriveB64PipeBug())
+ fixDsAtomicAsyncBarrierArriveB64(MI);
}
static bool isVCmpXWritesExec(const SIInstrInfo &TII, const SIRegisterInfo &TRI,
@@ -3451,3 +3453,18 @@ bool GCNHazardRecognizer::fixGetRegWaitIdle(MachineInstr *MI) {
.addImm(0);
return true;
}
+
+bool GCNHazardRecognizer::fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI) {
+ if (MI->getOpcode() != AMDGPU::DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64)
+ return false;
+
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII->get(AMDGPU::S_WAITCNT_DEPCTR))
+ .addImm(0xFFE3);
+ BuildMI(*MI->getParent(), std::next(MI->getIterator()), MI->getDebugLoc(),
+ TII->get(AMDGPU::S_WAITCNT_DEPCTR))
+ .addImm(0xFFE3);
+
+ return true;
+}
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index a078f50219c3c..890d5cbd154d6 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -111,6 +111,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
bool fixVALUMaskWriteHazard(MachineInstr *MI);
bool fixRequiredExportPriority(MachineInstr *MI);
bool fixGetRegWaitIdle(MachineInstr *MI);
+ bool fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI);
int checkMAIHazards(MachineInstr *MI);
int checkMAIHazards908(MachineInstr *MI);
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 92de024cc6fcc..436f5c0801fad 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1815,6 +1815,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
// to the same register.
return false;
}
+
+ // DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 shall not be claused with anything
+ // and surronded by S_WAIT_ALU(0xFFE3).
+ bool hasDsAtomicAsyncBarrierArriveB64PipeBug() const {
+ return getGeneration() == GFX12;
+ }
};
class GCNUserSGPRUsageInfo {
diff --git a/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir
new file mode 100644
index 0000000000000..f1dbabf1e1a83
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir
@@ -0,0 +1,17 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN %s
+
+---
+name: ds_atomic_async_barrier_arrive_b64
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; GCN-LABEL: name: ds_atomic_async_barrier_arrive_b64
+ ; GCN: liveins: $vgpr0, $vgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: S_WAITCNT_DEPCTR 65507
+ ; GCN-NEXT: DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 $vgpr1, 0, 0, implicit-def $asynccnt, implicit $asynccnt, implicit $exec
+ ; GCN-NEXT: S_WAITCNT_DEPCTR 65507
+ DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 $vgpr1, 0, 0, implicit-def $asynccnt, implicit $asynccnt, implicit $exec
+...
More information about the llvm-commits
mailing list