[llvm] [AMDGPU] Insert waitcnt for non-global fence release in GFX12 (PR #159282)
Fabian Ritter via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 17 01:21:41 PDT 2025
https://github.com/ritter-x2a created https://github.com/llvm/llvm-project/pull/159282
A fence release could be followed by a barrier, so it should wait for
the relevant memory accesses to complete, even if it is mmra-limited to
LDS. So far, that would be skipped for non-global fence releases.
Fixes SWDEV-554932.
>From 62973051f62549bb023fa9b48bc5ddf6191d3db6 Mon Sep 17 00:00:00 2001
From: Fabian Ritter <fabian.ritter at amd.com>
Date: Wed, 17 Sep 2025 03:55:18 -0400
Subject: [PATCH] [AMDGPU] Insert waitcnt for non-global fence release in GFX12
A fence release could be followed by a barrier, so it should wait for
the relevant memory accesses to complete, even if it is mmra-limited to
LDS. So far, that would be skipped for non-global fence releases.
Fixes SWDEV-554932.
---
llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 3 +-
.../AMDGPU/memory-legalizer-barriers-mmra.ll | 122 ++++++++++++++++++
.../memory-legalizer-fence-mmra-local.ll | 51 ++++++++
3 files changed, 174 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/memory-legalizer-barriers-mmra.ll
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index c501ebba0c7ed..beeddc58be193 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -2522,8 +2522,7 @@ bool SIGfx12CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
// sequentially consistent, and no other thread can access scratch
// memory.
- // Other address spaces do not have a cache.
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) == SIAtomicAddrSpace::NONE)
+ if (AddrSpace == SIAtomicAddrSpace::SCRATCH)
return false;
if (Pos == Position::AFTER)
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-barriers-mmra.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-barriers-mmra.ll
new file mode 100644
index 0000000000000..1e6dc4e06ef4d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-barriers-mmra.ll
@@ -0,0 +1,122 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10-WGP %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
+
+
+define float @test_barrier_workgroup_local_mmra(ptr addrspace(3) noundef %x, ptr addrspace(3) noundef %y, float %val) {
+; GFX10-WGP-LABEL: test_barrier_workgroup_local_mmra:
+; GFX10-WGP: ; %bb.0:
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-WGP-NEXT: ds_write_b32 v0, v2
+; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-WGP-NEXT: s_barrier
+; GFX10-WGP-NEXT: ds_read_b32 v0, v1
+; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-WGP-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-WGP-LABEL: test_barrier_workgroup_local_mmra:
+; GFX11-WGP: ; %bb.0:
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-WGP-NEXT: ds_store_b32 v0, v2
+; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-WGP-NEXT: s_barrier
+; GFX11-WGP-NEXT: ds_load_b32 v0, v1
+; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-WGP-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-WGP-LABEL: test_barrier_workgroup_local_mmra:
+; GFX12-WGP: ; %bb.0:
+; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-WGP-NEXT: s_wait_expcnt 0x0
+; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
+; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
+; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
+; GFX12-WGP-NEXT: ds_store_b32 v0, v2
+; GFX12-WGP-NEXT: s_wait_dscnt 0x0
+; GFX12-WGP-NEXT: s_barrier_signal -1
+; GFX12-WGP-NEXT: s_barrier_wait -1
+; GFX12-WGP-NEXT: ds_load_b32 v0, v1
+; GFX12-WGP-NEXT: s_wait_dscnt 0x0
+; GFX12-WGP-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: test_barrier_workgroup_local_mmra:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v2
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_barrier_signal -1
+; GFX1250-NEXT: s_barrier_wait -1
+; GFX1250-NEXT: ds_load_b32 v0, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+ store float %val, ptr addrspace(3) %x
+ fence syncscope("workgroup") release, !mmra !0
+ tail call void @llvm.amdgcn.s.barrier()
+ fence syncscope("workgroup") acquire, !mmra !0
+ %ret = load float, ptr addrspace(3) %y
+ ret float %ret
+}
+
+define float @test_barrier_workgroup_global_mmra(ptr addrspace(1) noundef %x, ptr addrspace(1) noundef %y, float %val) {
+; GFX10-WGP-LABEL: test_barrier_workgroup_global_mmra:
+; GFX10-WGP: ; %bb.0:
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-WGP-NEXT: global_store_dword v[0:1], v4, off
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-WGP-NEXT: s_barrier
+; GFX10-WGP-NEXT: buffer_gl0_inv
+; GFX10-WGP-NEXT: global_load_dword v0, v[2:3], off
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX10-WGP-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-WGP-LABEL: test_barrier_workgroup_global_mmra:
+; GFX11-WGP: ; %bb.0:
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-WGP-NEXT: global_store_b32 v[0:1], v4, off
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-WGP-NEXT: s_barrier
+; GFX11-WGP-NEXT: buffer_gl0_inv
+; GFX11-WGP-NEXT: global_load_b32 v0, v[2:3], off
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX11-WGP-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-WGP-LABEL: test_barrier_workgroup_global_mmra:
+; GFX12-WGP: ; %bb.0:
+; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-WGP-NEXT: s_wait_expcnt 0x0
+; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
+; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
+; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
+; GFX12-WGP-NEXT: global_store_b32 v[0:1], v4, off
+; GFX12-WGP-NEXT: s_wait_storecnt 0x0
+; GFX12-WGP-NEXT: s_barrier_signal -1
+; GFX12-WGP-NEXT: s_barrier_wait -1
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
+; GFX12-WGP-NEXT: global_load_b32 v0, v[2:3], off
+; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
+; GFX12-WGP-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: test_barrier_workgroup_global_mmra:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b32 v[0:1], v4, off
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_barrier_signal -1
+; GFX1250-NEXT: s_barrier_wait -1
+; GFX1250-NEXT: global_load_b32 v0, v[2:3], off
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+ store float %val, ptr addrspace(1) %x
+ fence syncscope("workgroup") release, !mmra !1
+ tail call void @llvm.amdgcn.s.barrier()
+ fence syncscope("workgroup") acquire, !mmra !1
+ %ret = load float, ptr addrspace(1) %y
+ ret float %ret
+}
+
+!0 = !{!"amdgpu-synchronize-as", !"local"}
+!1 = !{!"amdgpu-synchronize-as", !"global"}
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll
index cc42428e1aa06..682c1f6519959 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll
@@ -143,14 +143,17 @@ define amdgpu_kernel void @workgroup_release_fence() {
;
; GFX12-WGP-LABEL: workgroup_release_fence:
; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_wait_dscnt 0x0
; GFX12-WGP-NEXT: s_endpgm
;
; GFX12-CU-LABEL: workgroup_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: workgroup_release_fence:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") release, !mmra !{!"amdgpu-synchronize-as", !"local"}
@@ -213,14 +216,17 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
;
; GFX12-WGP-LABEL: workgroup_acq_rel_fence:
; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_wait_dscnt 0x0
; GFX12-WGP-NEXT: s_endpgm
;
; GFX12-CU-LABEL: workgroup_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: workgroup_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"local"}
@@ -283,14 +289,17 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
;
; GFX12-WGP-LABEL: workgroup_seq_cst_fence:
; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_wait_dscnt 0x0
; GFX12-WGP-NEXT: s_endpgm
;
; GFX12-CU-LABEL: workgroup_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: workgroup_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"local"}
@@ -670,14 +679,18 @@ define amdgpu_kernel void @agent_release_fence() {
;
; GFX12-WGP-LABEL: agent_release_fence:
; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_wait_dscnt 0x0
; GFX12-WGP-NEXT: s_endpgm
;
; GFX12-CU-LABEL: agent_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: agent_release_fence:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") release, !mmra !{!"amdgpu-synchronize-as", !"local"}
@@ -740,14 +753,18 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
;
; GFX12-WGP-LABEL: agent_acq_rel_fence:
; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_wait_dscnt 0x0
; GFX12-WGP-NEXT: s_endpgm
;
; GFX12-CU-LABEL: agent_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: agent_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"local"}
@@ -810,14 +827,18 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
;
; GFX12-WGP-LABEL: agent_seq_cst_fence:
; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_wait_dscnt 0x0
; GFX12-WGP-NEXT: s_endpgm
;
; GFX12-CU-LABEL: agent_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: agent_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"local"}
@@ -940,6 +961,7 @@ define amdgpu_kernel void @agent_one_as_release_fence() {
;
; GFX1250-LABEL: agent_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") release, !mmra !{!"amdgpu-synchronize-as", !"local"}
@@ -1001,6 +1023,7 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
;
; GFX1250-LABEL: agent_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"local"}
@@ -1062,6 +1085,7 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
;
; GFX1250-LABEL: agent_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"local"}
@@ -1197,14 +1221,20 @@ define amdgpu_kernel void @system_release_fence() {
;
; GFX12-WGP-LABEL: system_release_fence:
; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-WGP-NEXT: s_wait_dscnt 0x0
; GFX12-WGP-NEXT: s_endpgm
;
; GFX12-CU-LABEL: system_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: system_release_fence:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence release, !mmra !{!"amdgpu-synchronize-as", !"local"}
@@ -1267,14 +1297,20 @@ define amdgpu_kernel void @system_acq_rel_fence() {
;
; GFX12-WGP-LABEL: system_acq_rel_fence:
; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-WGP-NEXT: s_wait_dscnt 0x0
; GFX12-WGP-NEXT: s_endpgm
;
; GFX12-CU-LABEL: system_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: system_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence acq_rel, !mmra !{!"amdgpu-synchronize-as", !"local"}
@@ -1337,14 +1373,20 @@ define amdgpu_kernel void @system_seq_cst_fence() {
;
; GFX12-WGP-LABEL: system_seq_cst_fence:
; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-WGP-NEXT: s_wait_dscnt 0x0
; GFX12-WGP-NEXT: s_endpgm
;
; GFX12-CU-LABEL: system_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: system_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence seq_cst, !mmra !{!"amdgpu-synchronize-as", !"local"}
@@ -1459,14 +1501,17 @@ define amdgpu_kernel void @system_one_as_release_fence() {
;
; GFX12-WGP-LABEL: system_one_as_release_fence:
; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS
; GFX12-WGP-NEXT: s_endpgm
;
; GFX12-CU-LABEL: system_one_as_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: system_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") release, !mmra !{!"amdgpu-synchronize-as", !"local"}
@@ -1520,14 +1565,17 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() {
;
; GFX12-WGP-LABEL: system_one_as_acq_rel_fence:
; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS
; GFX12-WGP-NEXT: s_endpgm
;
; GFX12-CU-LABEL: system_one_as_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: system_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"local"}
@@ -1581,14 +1629,17 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() {
;
; GFX12-WGP-LABEL: system_one_as_seq_cst_fence:
; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS
; GFX12-WGP-NEXT: s_endpgm
;
; GFX12-CU-LABEL: system_one_as_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: system_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"local"}
More information about the llvm-commits
mailing list