[llvm] 87b6f85 - AMDGPU: Add syncscopes to some atomic tests
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 8 11:38:27 PDT 2023
Author: Matt Arsenault
Date: 2023-08-08T14:38:06-04:00
New Revision: 87b6f85c2be4dea52b01066b8f5cd55f97864773
URL: https://github.com/llvm/llvm-project/commit/87b6f85c2be4dea52b01066b8f5cd55f97864773
DIFF: https://github.com/llvm/llvm-project/commit/87b6f85c2be4dea52b01066b8f5cd55f97864773.diff
LOG: AMDGPU: Add syncscopes to some atomic tests
These were not testing what was intended, which should be the cases we
can directly select to the instructions.
Added:
Modified:
llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll
llvm/test/CodeGen/AMDGPU/acc-ldst.ll
llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
llvm/test/CodeGen/AMDGPU/dag-divergence-atomic.ll
llvm/test/CodeGen/AMDGPU/flat_atomics.ll
llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll
llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll
llvm/test/CodeGen/AMDGPU/global_atomics.ll
llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll
llvm/test/CodeGen/AMDGPU/mubuf.ll
llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll
llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll
index cb9e35556ff7c6..feb65a5210d59d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll
@@ -93,7 +93,7 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr add
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- %result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i32 42 seq_cst, align 4
+ %result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i32 42 syncscope("agent") seq_cst, align 4
store i32 %result, ptr addrspace(1) %out, align 4
ret void
}
@@ -179,7 +179,7 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i32_offset(ptr addrspace(1) %out,
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
- %result = atomicrmw udec_wrap ptr addrspace(3) %gep, i32 42 seq_cst, align 4
+ %result = atomicrmw udec_wrap ptr addrspace(3) %gep, i32 42 syncscope("agent") seq_cst, align 4
store i32 %result, ptr addrspace(1) %out, align 4
ret void
}
@@ -244,7 +244,7 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i32(ptr addrspace(3) %ptr) #1 {
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: s_endpgm
- %result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i32 42 seq_cst, align 4
+ %result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i32 42 syncscope("agent") seq_cst, align 4
ret void
}
@@ -309,7 +309,7 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i32_offset(ptr addrspace(3) %ptr
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: s_endpgm
%gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
- %result = atomicrmw udec_wrap ptr addrspace(3) %gep, i32 42 seq_cst, align 4
+ %result = atomicrmw udec_wrap ptr addrspace(3) %gep, i32 42 syncscope("agent") seq_cst, align 4
ret void
}
@@ -386,7 +386,7 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- %result = atomicrmw udec_wrap ptr addrspace(1) %ptr, i32 42 seq_cst, align 4
+ %result = atomicrmw udec_wrap ptr addrspace(1) %ptr, i32 42 syncscope("agent") seq_cst, align 4
store i32 %result, ptr addrspace(1) %out, align 4
ret void
}
@@ -467,6 +467,89 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32_offset(ptr addrspace(1) %ou
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+ %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
+ %result = atomicrmw udec_wrap ptr addrspace(1) %gep, i32 42 syncscope("agent") seq_cst, align 4
+ store i32 %result, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_system(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #1 {
+; CI-LABEL: global_atomic_dec_ret_i32_offset_system:
+; CI: ; %bb.0:
+; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; CI-NEXT: v_mov_b32_e32 v2, 42
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_add_u32 s2, s2, 16
+; CI-NEXT: s_addc_u32 s3, s3, 0
+; CI-NEXT: v_mov_b32_e32 v0, s2
+; CI-NEXT: v_mov_b32_e32 v1, s3
+; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CI-NEXT: flat_atomic_dec v2, v[0:1], v2 glc
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: buffer_wbinvl1_vol
+; CI-NEXT: v_mov_b32_e32 v0, s0
+; CI-NEXT: v_mov_b32_e32 v1, s1
+; CI-NEXT: flat_store_dword v[0:1], v2
+; CI-NEXT: s_endpgm
+;
+; VI-LABEL: global_atomic_dec_ret_i32_offset_system:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; VI-NEXT: v_mov_b32_e32 v2, 42
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_add_u32 s2, s2, 16
+; VI-NEXT: s_addc_u32 s3, s3, 0
+; VI-NEXT: v_mov_b32_e32 v0, s2
+; VI-NEXT: v_mov_b32_e32 v1, s3
+; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT: flat_atomic_dec v2, v[0:1], v2 glc
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: buffer_wbinvl1_vol
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: v_mov_b32_e32 v1, s1
+; VI-NEXT: flat_store_dword v[0:1], v2
+; VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: global_atomic_dec_ret_i32_offset_system:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v0, 42
+; GFX9-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_atomic_dec v0, v1, v0, s[2:3] offset:16 glc
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: buffer_wbinvl1_vol
+; GFX9-NEXT: global_store_dword v1, v0, s[0:1]
+; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: global_atomic_dec_ret_i32_offset_system:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX10-NEXT: v_mov_b32_e32 v0, 42
+; GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: global_atomic_dec v0, v1, v0, s[2:3] offset:16 glc
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10-NEXT: global_store_dword v1, v0, s[0:1]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: global_atomic_dec_ret_i32_offset_system:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
+; GFX11-NEXT: v_dual_mov_b32 v0, 42 :: v_dual_mov_b32 v1, 0
+; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: global_atomic_dec_u32 v0, v1, v0, s[2:3] offset:16 glc
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: buffer_gl0_inv
+; GFX11-NEXT: buffer_gl1_inv
+; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i32 42 seq_cst, align 4
@@ -536,7 +619,7 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32(ptr addrspace(1) %ptr) #1
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: s_endpgm
- %result = atomicrmw udec_wrap ptr addrspace(1) %ptr, i32 42 seq_cst, align 4
+ %result = atomicrmw udec_wrap ptr addrspace(1) %ptr, i32 42 syncscope("agent") seq_cst, align 4
ret void
}
@@ -605,6 +688,77 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32_offset(ptr addrspace(1) %
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: buffer_gl1_inv
+; GFX11-NEXT: s_endpgm
+ %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
+ %result = atomicrmw udec_wrap ptr addrspace(1) %gep, i32 42 syncscope("agent") seq_cst, align 4
+ ret void
+}
+
+define amdgpu_kernel void @global_atomic_dec_noret_i32_offset_system(ptr addrspace(1) %ptr) #1 {
+; CI-LABEL: global_atomic_dec_noret_i32_offset_system:
+; CI: ; %bb.0:
+; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; CI-NEXT: v_mov_b32_e32 v2, 42
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_add_u32 s0, s0, 16
+; CI-NEXT: s_addc_u32 s1, s1, 0
+; CI-NEXT: v_mov_b32_e32 v0, s0
+; CI-NEXT: v_mov_b32_e32 v1, s1
+; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CI-NEXT: flat_atomic_dec v[0:1], v2
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: buffer_wbinvl1_vol
+; CI-NEXT: s_endpgm
+;
+; VI-LABEL: global_atomic_dec_noret_i32_offset_system:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; VI-NEXT: v_mov_b32_e32 v2, 42
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_add_u32 s0, s0, 16
+; VI-NEXT: s_addc_u32 s1, s1, 0
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: v_mov_b32_e32 v1, s1
+; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT: flat_atomic_dec v[0:1], v2
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: buffer_wbinvl1_vol
+; VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: global_atomic_dec_noret_i32_offset_system:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v0, 42
+; GFX9-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_atomic_dec v1, v0, s[0:1] offset:16
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: buffer_wbinvl1_vol
+; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: global_atomic_dec_noret_i32_offset_system:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX10-NEXT: v_mov_b32_e32 v0, 42
+; GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: global_atomic_dec v1, v0, s[0:1] offset:16
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: global_atomic_dec_noret_i32_offset_system:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
+; GFX11-NEXT: v_dual_mov_b32 v0, 42 :: v_dual_mov_b32 v1, 0
+; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: global_atomic_dec_u32 v1, v0, s[0:1] offset:16
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: buffer_gl0_inv
+; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: s_endpgm
%gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i32 42 seq_cst, align 4
@@ -702,7 +856,7 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_addr64(ptr addrspace
%gep.tid = getelementptr i32, ptr addrspace(1) %ptr, i32 %id
%out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id
%gep = getelementptr i32, ptr addrspace(1) %gep.tid, i32 5
- %result = atomicrmw udec_wrap ptr addrspace(1) %gep, i32 42 seq_cst, align 4
+ %result = atomicrmw udec_wrap ptr addrspace(1) %gep, i32 42 syncscope("agent") seq_cst, align 4
store i32 %result, ptr addrspace(1) %out.gep, align 4
ret void
}
@@ -782,7 +936,7 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32_offset_addr64(ptr addrspa
%id = call i32 @llvm.amdgcn.workitem.id.x()
%gep.tid = getelementptr i32, ptr addrspace(1) %ptr, i32 %id
%gep = getelementptr i32, ptr addrspace(1) %gep.tid, i32 5
- %result = atomicrmw udec_wrap ptr addrspace(1) %gep, i32 42 seq_cst, align 4
+ %result = atomicrmw udec_wrap ptr addrspace(1) %gep, i32 42 syncscope("agent") seq_cst, align 4
ret void
}
@@ -868,7 +1022,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32(ptr %out, ptr %ptr) #1 {
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: flat_store_b32 v[0:1], v2
; GFX11-NEXT: s_endpgm
- %result = atomicrmw udec_wrap ptr %ptr, i32 42 seq_cst, align 4
+ %result = atomicrmw udec_wrap ptr %ptr, i32 42 syncscope("agent") seq_cst, align 4
store i32 %result, ptr %out, align 4
ret void
}
@@ -960,6 +1114,100 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(ptr %out, ptr %ptr) #1
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: flat_store_b32 v[0:1], v2
+; GFX11-NEXT: s_endpgm
+ %gep = getelementptr i32, ptr %ptr, i32 4
+ %result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
+ store i32 %result, ptr %out, align 4
+ ret void
+}
+
+define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_system(ptr %out, ptr %ptr) #1 {
+; CI-LABEL: flat_atomic_dec_ret_i32_offset_system:
+; CI: ; %bb.0:
+; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; CI-NEXT: v_mov_b32_e32 v2, 42
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_add_u32 s2, s2, 16
+; CI-NEXT: s_addc_u32 s3, s3, 0
+; CI-NEXT: v_mov_b32_e32 v0, s2
+; CI-NEXT: v_mov_b32_e32 v1, s3
+; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CI-NEXT: flat_atomic_dec v2, v[0:1], v2 glc
+; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CI-NEXT: buffer_wbinvl1_vol
+; CI-NEXT: v_mov_b32_e32 v0, s0
+; CI-NEXT: v_mov_b32_e32 v1, s1
+; CI-NEXT: flat_store_dword v[0:1], v2
+; CI-NEXT: s_endpgm
+;
+; VI-LABEL: flat_atomic_dec_ret_i32_offset_system:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; VI-NEXT: v_mov_b32_e32 v2, 42
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_add_u32 s2, s2, 16
+; VI-NEXT: s_addc_u32 s3, s3, 0
+; VI-NEXT: v_mov_b32_e32 v0, s2
+; VI-NEXT: v_mov_b32_e32 v1, s3
+; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT: flat_atomic_dec v2, v[0:1], v2 glc
+; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT: buffer_wbinvl1_vol
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: v_mov_b32_e32 v1, s1
+; VI-NEXT: flat_store_dword v[0:1], v2
+; VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: flat_atomic_dec_ret_i32_offset_system:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v2, 42
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-NEXT: v_mov_b32_e32 v1, s3
+; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-NEXT: flat_atomic_dec v2, v[0:1], v2 offset:16 glc
+; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-NEXT: buffer_wbinvl1_vol
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: flat_store_dword v[0:1], v2
+; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: flat_atomic_dec_ret_i32_offset_system:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX10-NEXT: v_mov_b32_e32 v2, 42
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_add_u32 s2, s2, 16
+; GFX10-NEXT: s_addc_u32 s3, s3, 0
+; GFX10-NEXT: v_mov_b32_e32 v0, s2
+; GFX10-NEXT: v_mov_b32_e32 v1, s3
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: flat_atomic_dec v2, v[0:1], v2 glc
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GFX10-NEXT: v_mov_b32_e32 v1, s1
+; GFX10-NEXT: flat_store_dword v[0:1], v2
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: flat_atomic_dec_ret_i32_offset_system:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
+; GFX11-NEXT: v_mov_b32_e32 v2, 42
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
+; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: flat_atomic_dec_u32 v2, v[0:1], v2 offset:16 glc
+; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: buffer_gl0_inv
+; GFX11-NEXT: buffer_gl1_inv
+; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-NEXT: flat_store_b32 v[0:1], v2
; GFX11-NEXT: s_endpgm
%gep = getelementptr i32, ptr %ptr, i32 4
%result = atomicrmw udec_wrap ptr %gep, i32 42 seq_cst, align 4
@@ -994,7 +1242,85 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32(ptr %ptr) #1 {
; VI-NEXT: buffer_wbinvl1_vol
; VI-NEXT: s_endpgm
;
-; GFX9-LABEL: flat_atomic_dec_noret_i32:
+; GFX9-LABEL: flat_atomic_dec_noret_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v2, 42
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-NEXT: flat_atomic_dec v[0:1], v2
+; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-NEXT: buffer_wbinvl1_vol
+; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: flat_atomic_dec_noret_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX10-NEXT: v_mov_b32_e32 v2, 42
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GFX10-NEXT: v_mov_b32_e32 v1, s1
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: flat_atomic_dec v[0:1], v2
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: flat_atomic_dec_noret_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
+; GFX11-NEXT: v_mov_b32_e32 v2, 42
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: flat_atomic_dec_u32 v[0:1], v2
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: buffer_gl0_inv
+; GFX11-NEXT: buffer_gl1_inv
+; GFX11-NEXT: s_endpgm
+ %result = atomicrmw udec_wrap ptr %ptr, i32 42 syncscope("agent") seq_cst, align 4
+ ret void
+}
+
+define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(ptr %ptr) #1 {
+; CI-LABEL: flat_atomic_dec_noret_i32_offset:
+; CI: ; %bb.0:
+; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; CI-NEXT: v_mov_b32_e32 v2, 42
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_add_u32 s0, s0, 16
+; CI-NEXT: s_addc_u32 s1, s1, 0
+; CI-NEXT: v_mov_b32_e32 v0, s0
+; CI-NEXT: v_mov_b32_e32 v1, s1
+; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CI-NEXT: flat_atomic_dec v[0:1], v2
+; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CI-NEXT: buffer_wbinvl1_vol
+; CI-NEXT: s_endpgm
+;
+; VI-LABEL: flat_atomic_dec_noret_i32_offset:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; VI-NEXT: v_mov_b32_e32 v2, 42
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_add_u32 s0, s0, 16
+; VI-NEXT: s_addc_u32 s1, s1, 0
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: v_mov_b32_e32 v1, s1
+; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT: flat_atomic_dec v[0:1], v2
+; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT: buffer_wbinvl1_vol
+; VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: flat_atomic_dec_noret_i32_offset:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX9-NEXT: v_mov_b32_e32 v2, 42
@@ -1002,16 +1328,18 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32(ptr %ptr) #1 {
; GFX9-NEXT: v_mov_b32_e32 v0, s0
; GFX9-NEXT: v_mov_b32_e32 v1, s1
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX9-NEXT: flat_atomic_dec v[0:1], v2
+; GFX9-NEXT: flat_atomic_dec v[0:1], v2 offset:16
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-NEXT: buffer_wbinvl1_vol
; GFX9-NEXT: s_endpgm
;
-; GFX10-LABEL: flat_atomic_dec_noret_i32:
+; GFX10-LABEL: flat_atomic_dec_noret_i32_offset:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX10-NEXT: v_mov_b32_e32 v2, 42
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_add_u32 s0, s0, 16
+; GFX10-NEXT: s_addc_u32 s1, s1, 0
; GFX10-NEXT: v_mov_b32_e32 v0, s0
; GFX10-NEXT: v_mov_b32_e32 v1, s1
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -1023,7 +1351,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32(ptr %ptr) #1 {
; GFX10-NEXT: buffer_gl1_inv
; GFX10-NEXT: s_endpgm
;
-; GFX11-LABEL: flat_atomic_dec_noret_i32:
+; GFX11-LABEL: flat_atomic_dec_noret_i32_offset:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: v_mov_b32_e32 v2, 42
@@ -1031,18 +1359,19 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32(ptr %ptr) #1 {
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: flat_atomic_dec_u32 v[0:1], v2
+; GFX11-NEXT: flat_atomic_dec_u32 v[0:1], v2 offset:16
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: s_endpgm
- %result = atomicrmw udec_wrap ptr %ptr, i32 42 seq_cst, align 4
+ %gep = getelementptr i32, ptr %ptr, i32 4
+ %result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
ret void
}
-define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(ptr %ptr) #1 {
-; CI-LABEL: flat_atomic_dec_noret_i32_offset:
+define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_system(ptr %ptr) #1 {
+; CI-LABEL: flat_atomic_dec_noret_i32_offset_system:
; CI: ; %bb.0:
; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; CI-NEXT: v_mov_b32_e32 v2, 42
@@ -1057,7 +1386,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(ptr %ptr) #1 {
; CI-NEXT: buffer_wbinvl1_vol
; CI-NEXT: s_endpgm
;
-; VI-LABEL: flat_atomic_dec_noret_i32_offset:
+; VI-LABEL: flat_atomic_dec_noret_i32_offset_system:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; VI-NEXT: v_mov_b32_e32 v2, 42
@@ -1072,7 +1401,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(ptr %ptr) #1 {
; VI-NEXT: buffer_wbinvl1_vol
; VI-NEXT: s_endpgm
;
-; GFX9-LABEL: flat_atomic_dec_noret_i32_offset:
+; GFX9-LABEL: flat_atomic_dec_noret_i32_offset_system:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX9-NEXT: v_mov_b32_e32 v2, 42
@@ -1085,7 +1414,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(ptr %ptr) #1 {
; GFX9-NEXT: buffer_wbinvl1_vol
; GFX9-NEXT: s_endpgm
;
-; GFX10-LABEL: flat_atomic_dec_noret_i32_offset:
+; GFX10-LABEL: flat_atomic_dec_noret_i32_offset_system:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX10-NEXT: v_mov_b32_e32 v2, 42
@@ -1103,7 +1432,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(ptr %ptr) #1 {
; GFX10-NEXT: buffer_gl1_inv
; GFX10-NEXT: s_endpgm
;
-; GFX11-LABEL: flat_atomic_dec_noret_i32_offset:
+; GFX11-LABEL: flat_atomic_dec_noret_i32_offset_system:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: v_mov_b32_e32 v2, 42
@@ -1240,7 +1569,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(ptr %out, ptr %
%gep.tid = getelementptr i32, ptr %ptr, i32 %id
%out.gep = getelementptr i32, ptr %out, i32 %id
%gep = getelementptr i32, ptr %gep.tid, i32 5
- %result = atomicrmw udec_wrap ptr %gep, i32 42 seq_cst, align 4
+ %result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
store i32 %result, ptr %out.gep, align 4
ret void
}
@@ -1340,7 +1669,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(ptr %ptr) #1
%id = call i32 @llvm.amdgcn.workitem.id.x()
%gep.tid = getelementptr i32, ptr %ptr, i32 %id
%gep = getelementptr i32, ptr %gep.tid, i32 5
- %result = atomicrmw udec_wrap ptr %gep, i32 42 seq_cst, align 4
+ %result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
ret void
}
@@ -1441,7 +1770,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64(ptr %out, ptr %ptr) #1 {
; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX11-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX11-NEXT: s_endpgm
- %result = atomicrmw udec_wrap ptr %ptr, i64 42 seq_cst, align 8
+ %result = atomicrmw udec_wrap ptr %ptr, i64 42 syncscope("agent") seq_cst, align 8
store i64 %result, ptr %out, align 4
ret void
}
@@ -1550,7 +1879,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(ptr %out, ptr %ptr) #1
; GFX11-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX11-NEXT: s_endpgm
%gep = getelementptr i64, ptr %ptr, i32 4
- %result = atomicrmw udec_wrap ptr %gep, i64 42 seq_cst, align 8
+ %result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8
store i64 %result, ptr %out, align 4
ret void
}
@@ -1630,7 +1959,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64(ptr %ptr) #1 {
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: s_endpgm
- %result = atomicrmw udec_wrap ptr %ptr, i64 42 seq_cst, align 8
+ %result = atomicrmw udec_wrap ptr %ptr, i64 42 syncscope("agent") seq_cst, align 8
ret void
}
@@ -1714,6 +2043,92 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(ptr %ptr) #1 {
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: buffer_gl1_inv
+; GFX11-NEXT: s_endpgm
+ %gep = getelementptr i64, ptr %ptr, i32 4
+ %result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8
+ ret void
+}
+
+define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_system(ptr %ptr) #1 {
+; CI-LABEL: flat_atomic_dec_noret_i64_offset_system:
+; CI: ; %bb.0:
+; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; CI-NEXT: v_mov_b32_e32 v0, 42
+; CI-NEXT: v_mov_b32_e32 v1, 0
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_add_u32 s0, s0, 32
+; CI-NEXT: s_addc_u32 s1, s1, 0
+; CI-NEXT: v_mov_b32_e32 v3, s1
+; CI-NEXT: v_mov_b32_e32 v2, s0
+; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1]
+; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CI-NEXT: buffer_wbinvl1_vol
+; CI-NEXT: s_endpgm
+;
+; VI-LABEL: flat_atomic_dec_noret_i64_offset_system:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; VI-NEXT: v_mov_b32_e32 v0, 42
+; VI-NEXT: v_mov_b32_e32 v1, 0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_add_u32 s0, s0, 32
+; VI-NEXT: s_addc_u32 s1, s1, 0
+; VI-NEXT: v_mov_b32_e32 v3, s1
+; VI-NEXT: v_mov_b32_e32 v2, s0
+; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1]
+; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT: buffer_wbinvl1_vol
+; VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: flat_atomic_dec_noret_i64_offset_system:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v0, 42
+; GFX9-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v3, s1
+; GFX9-NEXT: v_mov_b32_e32 v2, s0
+; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] offset:32
+; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-NEXT: buffer_wbinvl1_vol
+; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: flat_atomic_dec_noret_i64_offset_system:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX10-NEXT: v_mov_b32_e32 v0, 42
+; GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_add_u32 s0, s0, 32
+; GFX10-NEXT: s_addc_u32 s1, s1, 0
+; GFX10-NEXT: v_mov_b32_e32 v3, s1
+; GFX10-NEXT: v_mov_b32_e32 v2, s0
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1]
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: flat_atomic_dec_noret_i64_offset_system:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
+; GFX11-NEXT: v_mov_b32_e32 v0, 42
+; GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
+; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: flat_atomic_dec_u64 v[2:3], v[0:1] offset:32
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: buffer_gl0_inv
+; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: s_endpgm
%gep = getelementptr i64, ptr %ptr, i32 4
%result = atomicrmw udec_wrap ptr %gep, i64 42 seq_cst, align 8
@@ -1850,7 +2265,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(ptr %out, ptr %
%gep.tid = getelementptr i64, ptr %ptr, i32 %id
%out.gep = getelementptr i64, ptr %out, i32 %id
%gep = getelementptr i64, ptr %gep.tid, i32 5
- %result = atomicrmw udec_wrap ptr %gep, i64 42 seq_cst, align 8
+ %result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8
store i64 %result, ptr %out.gep, align 4
ret void
}
@@ -1955,7 +2370,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(ptr %ptr) #1
%id = call i32 @llvm.amdgcn.workitem.id.x()
%gep.tid = getelementptr i64, ptr %ptr, i32 %id
%gep = getelementptr i64, ptr %gep.tid, i32 5
- %result = atomicrmw udec_wrap ptr %gep, i64 42 seq_cst, align 8
+ %result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8
ret void
}
@@ -2046,7 +2461,7 @@ define amdgpu_kernel void @atomic_dec_shl_base_lds_0(ptr addrspace(1) %out, ptr
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #2
%idx.0 = add nsw i32 %tid.x, 2
%arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds0, i32 0, i32 %idx.0
- %result = atomicrmw udec_wrap ptr addrspace(3) %arrayidx0, i32 9 seq_cst, align 4
+ %result = atomicrmw udec_wrap ptr addrspace(3) %arrayidx0, i32 9 syncscope("agent") seq_cst, align 4
store i32 %idx.0, ptr addrspace(1) %add_use, align 4
store i32 %result, ptr addrspace(1) %out, align 4
ret void
@@ -2137,7 +2552,7 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr add
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- %result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i64 42 seq_cst, align 8
+ %result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i64 42 syncscope("agent") seq_cst, align 8
store i64 %result, ptr addrspace(1) %out, align 4
ret void
}
@@ -2228,7 +2643,7 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i64_offset(ptr addrspace(1) %out,
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
- %result = atomicrmw udec_wrap ptr addrspace(3) %gep, i64 42 seq_cst, align 8
+ %result = atomicrmw udec_wrap ptr addrspace(3) %gep, i64 42 syncscope("agent") seq_cst, align 8
store i64 %result, ptr addrspace(1) %out, align 4
ret void
}
@@ -2298,7 +2713,7 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i64(ptr addrspace(3) %ptr) #1 {
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: s_endpgm
- %result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i64 42 seq_cst, align 8
+ %result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i64 42 syncscope("agent") seq_cst, align 8
ret void
}
@@ -2368,7 +2783,7 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i64_offset(ptr addrspace(3) %ptr
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: s_endpgm
%gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
- %result = atomicrmw udec_wrap ptr addrspace(3) %gep, i64 42 seq_cst, align 8
+ %result = atomicrmw udec_wrap ptr addrspace(3) %gep, i64 42 syncscope("agent") seq_cst, align 8
ret void
}
@@ -2450,7 +2865,7 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- %result = atomicrmw udec_wrap ptr addrspace(1) %ptr, i64 42 seq_cst, align 8
+ %result = atomicrmw udec_wrap ptr addrspace(1) %ptr, i64 42 syncscope("agent") seq_cst, align 8
store i64 %result, ptr addrspace(1) %out, align 4
ret void
}
@@ -2536,6 +2951,94 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64_offset(ptr addrspace(1) %ou
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+ %gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4
+ %result = atomicrmw udec_wrap ptr addrspace(1) %gep, i64 42 syncscope("agent") seq_cst, align 8
+ store i64 %result, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_system(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #1 {
+; CI-LABEL: global_atomic_dec_ret_i64_offset_system:
+; CI: ; %bb.0:
+; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; CI-NEXT: v_mov_b32_e32 v0, 42
+; CI-NEXT: v_mov_b32_e32 v1, 0
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_add_u32 s2, s2, 32
+; CI-NEXT: s_addc_u32 s3, s3, 0
+; CI-NEXT: v_mov_b32_e32 v2, s2
+; CI-NEXT: v_mov_b32_e32 v3, s3
+; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CI-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: buffer_wbinvl1_vol
+; CI-NEXT: v_mov_b32_e32 v3, s1
+; CI-NEXT: v_mov_b32_e32 v2, s0
+; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; CI-NEXT: s_endpgm
+;
+; VI-LABEL: global_atomic_dec_ret_i64_offset_system:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; VI-NEXT: v_mov_b32_e32 v0, 42
+; VI-NEXT: v_mov_b32_e32 v1, 0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_add_u32 s2, s2, 32
+; VI-NEXT: s_addc_u32 s3, s3, 0
+; VI-NEXT: v_mov_b32_e32 v2, s2
+; VI-NEXT: v_mov_b32_e32 v3, s3
+; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: buffer_wbinvl1_vol
+; VI-NEXT: v_mov_b32_e32 v3, s1
+; VI-NEXT: v_mov_b32_e32 v2, s0
+; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: global_atomic_dec_ret_i64_offset_system:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v0, 42
+; GFX9-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_atomic_dec_x2 v[0:1], v2, v[0:1], s[2:3] offset:32 glc
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: buffer_wbinvl1_vol
+; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: global_atomic_dec_ret_i64_offset_system:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX10-NEXT: v_mov_b32_e32 v0, 42
+; GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-NEXT: v_mov_b32_e32 v2, 0
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: global_atomic_dec_x2 v[0:1], v2, v[0:1], s[2:3] offset:32 glc
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: global_atomic_dec_ret_i64_offset_system:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
+; GFX11-NEXT: v_mov_b32_e32 v0, 42
+; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
+; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: global_atomic_dec_u64 v[0:1], v2, v[0:1], s[2:3] offset:32 glc
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: buffer_gl0_inv
+; GFX11-NEXT: buffer_gl1_inv
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i64 42 seq_cst, align 8
@@ -2610,7 +3113,7 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64(ptr addrspace(1) %ptr) #1
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: s_endpgm
- %result = atomicrmw udec_wrap ptr addrspace(1) %ptr, i64 42 seq_cst, align 8
+ %result = atomicrmw udec_wrap ptr addrspace(1) %ptr, i64 42 syncscope("agent") seq_cst, align 8
ret void
}
@@ -2684,6 +3187,82 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset(ptr addrspace(1) %
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: buffer_gl1_inv
+; GFX11-NEXT: s_endpgm
+ %gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4
+ %result = atomicrmw udec_wrap ptr addrspace(1) %gep, i64 42 syncscope("agent") seq_cst, align 8
+ ret void
+}
+
+define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_system(ptr addrspace(1) %ptr) #1 {
+; CI-LABEL: global_atomic_dec_noret_i64_offset_system:
+; CI: ; %bb.0:
+; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; CI-NEXT: v_mov_b32_e32 v0, 42
+; CI-NEXT: v_mov_b32_e32 v1, 0
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_add_u32 s0, s0, 32
+; CI-NEXT: s_addc_u32 s1, s1, 0
+; CI-NEXT: v_mov_b32_e32 v3, s1
+; CI-NEXT: v_mov_b32_e32 v2, s0
+; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1]
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: buffer_wbinvl1_vol
+; CI-NEXT: s_endpgm
+;
+; VI-LABEL: global_atomic_dec_noret_i64_offset_system:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; VI-NEXT: v_mov_b32_e32 v0, 42
+; VI-NEXT: v_mov_b32_e32 v1, 0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_add_u32 s0, s0, 32
+; VI-NEXT: s_addc_u32 s1, s1, 0
+; VI-NEXT: v_mov_b32_e32 v3, s1
+; VI-NEXT: v_mov_b32_e32 v2, s0
+; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1]
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: buffer_wbinvl1_vol
+; VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: global_atomic_dec_noret_i64_offset_system:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v0, 42
+; GFX9-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_atomic_dec_x2 v2, v[0:1], s[0:1] offset:32
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: buffer_wbinvl1_vol
+; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: global_atomic_dec_noret_i64_offset_system:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX10-NEXT: v_mov_b32_e32 v0, 42
+; GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-NEXT: v_mov_b32_e32 v2, 0
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: global_atomic_dec_x2 v2, v[0:1], s[0:1] offset:32
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: global_atomic_dec_noret_i64_offset_system:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
+; GFX11-NEXT: v_mov_b32_e32 v0, 42
+; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
+; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: global_atomic_dec_u64 v2, v[0:1], s[0:1] offset:32
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: buffer_gl0_inv
+; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: s_endpgm
%gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i64 42 seq_cst, align 8
@@ -2786,7 +3365,7 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(ptr addrspace
%gep.tid = getelementptr i64, ptr addrspace(1) %ptr, i32 %id
%out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id
%gep = getelementptr i64, ptr addrspace(1) %gep.tid, i32 5
- %result = atomicrmw udec_wrap ptr addrspace(1) %gep, i64 42 seq_cst, align 8
+ %result = atomicrmw udec_wrap ptr addrspace(1) %gep, i64 42 syncscope("agent") seq_cst, align 8
store i64 %result, ptr addrspace(1) %out.gep, align 4
ret void
}
@@ -2871,7 +3450,7 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(ptr addrspa
%id = call i32 @llvm.amdgcn.workitem.id.x()
%gep.tid = getelementptr i64, ptr addrspace(1) %ptr, i32 %id
%gep = getelementptr i64, ptr addrspace(1) %gep.tid, i32 5
- %result = atomicrmw udec_wrap ptr addrspace(1) %gep, i64 42 seq_cst, align 8
+ %result = atomicrmw udec_wrap ptr addrspace(1) %gep, i64 42 syncscope("agent") seq_cst, align 8
ret void
}
@@ -2967,7 +3546,7 @@ define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(ptr addrspace(1) %out,
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #2
%idx.0 = add nsw i32 %tid.x, 2
%arrayidx0 = getelementptr inbounds [512 x i64], ptr addrspace(3) @lds1, i32 0, i32 %idx.0
- %result = atomicrmw udec_wrap ptr addrspace(3) %arrayidx0, i64 9 seq_cst, align 8
+ %result = atomicrmw udec_wrap ptr addrspace(3) %arrayidx0, i64 9 syncscope("agent") seq_cst, align 8
store i32 %idx.0, ptr addrspace(1) %add_use, align 4
store i64 %result, ptr addrspace(1) %out, align 4
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll
index cabb67d073702a..009a66721de159 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll
@@ -93,7 +93,7 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i32(ptr addrspace(1) %out, ptr add
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- %result = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i32 42 seq_cst, align 4
+ %result = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i32 42 syncscope("agent") seq_cst, align 4
store i32 %result, ptr addrspace(1) %out, align 4
ret void
}
@@ -179,7 +179,7 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i32_offset(ptr addrspace(1) %out,
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
- %result = atomicrmw uinc_wrap ptr addrspace(3) %gep, i32 42 seq_cst, align 4
+ %result = atomicrmw uinc_wrap ptr addrspace(3) %gep, i32 42 syncscope("agent") seq_cst, align 4
store i32 %result, ptr addrspace(1) %out, align 4
ret void
}
@@ -244,7 +244,7 @@ define amdgpu_kernel void @lds_atomic_inc_noret_i32(ptr addrspace(3) %ptr) #1 {
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: s_endpgm
- %result = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i32 42 seq_cst, align 4
+ %result = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i32 42 syncscope("agent") seq_cst, align 4
ret void
}
@@ -309,7 +309,7 @@ define amdgpu_kernel void @lds_atomic_inc_noret_i32_offset(ptr addrspace(3) %ptr
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: s_endpgm
%gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
- %result = atomicrmw uinc_wrap ptr addrspace(3) %gep, i32 42 seq_cst, align 4
+ %result = atomicrmw uinc_wrap ptr addrspace(3) %gep, i32 42 syncscope("agent") seq_cst, align 4
ret void
}
@@ -386,7 +386,7 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32(ptr addrspace(1) %out, ptr
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- %result = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i32 42 seq_cst, align 4
+ %result = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i32 42 syncscope("agent") seq_cst, align 4
store i32 %result, ptr addrspace(1) %out, align 4
ret void
}
@@ -467,6 +467,89 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32_offset(ptr addrspace(1) %ou
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+ %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
+ %result = atomicrmw uinc_wrap ptr addrspace(1) %gep, i32 42 syncscope("agent") seq_cst, align 4
+ store i32 %result, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_sistem(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #1 {
+; CI-LABEL: global_atomic_inc_ret_i32_offset_sistem:
+; CI: ; %bb.0:
+; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; CI-NEXT: v_mov_b32_e32 v2, 42
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_add_u32 s2, s2, 16
+; CI-NEXT: s_addc_u32 s3, s3, 0
+; CI-NEXT: v_mov_b32_e32 v0, s2
+; CI-NEXT: v_mov_b32_e32 v1, s3
+; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CI-NEXT: flat_atomic_inc v2, v[0:1], v2 glc
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: buffer_wbinvl1_vol
+; CI-NEXT: v_mov_b32_e32 v0, s0
+; CI-NEXT: v_mov_b32_e32 v1, s1
+; CI-NEXT: flat_store_dword v[0:1], v2
+; CI-NEXT: s_endpgm
+;
+; VI-LABEL: global_atomic_inc_ret_i32_offset_sistem:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; VI-NEXT: v_mov_b32_e32 v2, 42
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_add_u32 s2, s2, 16
+; VI-NEXT: s_addc_u32 s3, s3, 0
+; VI-NEXT: v_mov_b32_e32 v0, s2
+; VI-NEXT: v_mov_b32_e32 v1, s3
+; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT: flat_atomic_inc v2, v[0:1], v2 glc
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: buffer_wbinvl1_vol
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: v_mov_b32_e32 v1, s1
+; VI-NEXT: flat_store_dword v[0:1], v2
+; VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: global_atomic_inc_ret_i32_offset_sistem:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v0, 42
+; GFX9-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_atomic_inc v0, v1, v0, s[2:3] offset:16 glc
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: buffer_wbinvl1_vol
+; GFX9-NEXT: global_store_dword v1, v0, s[0:1]
+; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: global_atomic_inc_ret_i32_offset_sistem:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX10-NEXT: v_mov_b32_e32 v0, 42
+; GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: global_atomic_inc v0, v1, v0, s[2:3] offset:16 glc
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10-NEXT: global_store_dword v1, v0, s[0:1]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: global_atomic_inc_ret_i32_offset_sistem:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
+; GFX11-NEXT: v_dual_mov_b32 v0, 42 :: v_dual_mov_b32 v1, 0
+; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: global_atomic_inc_u32 v0, v1, v0, s[2:3] offset:16 glc
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: buffer_gl0_inv
+; GFX11-NEXT: buffer_gl1_inv
+; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
%result = atomicrmw uinc_wrap ptr addrspace(1) %gep, i32 42 seq_cst, align 4
@@ -536,7 +619,7 @@ define amdgpu_kernel void @global_atomic_inc_noret_i32(ptr addrspace(1) %ptr) #1
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: s_endpgm
- %result = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i32 42 seq_cst, align 4
+ %result = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i32 42 syncscope("agent") seq_cst, align 4
ret void
}
@@ -605,6 +688,77 @@ define amdgpu_kernel void @global_atomic_inc_noret_i32_offset(ptr addrspace(1) %
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: buffer_gl1_inv
+; GFX11-NEXT: s_endpgm
+ %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
+ %result = atomicrmw uinc_wrap ptr addrspace(1) %gep, i32 42 syncscope("agent") seq_cst, align 4
+ ret void
+}
+
+define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_system(ptr addrspace(1) %ptr) #1 {
+; CI-LABEL: global_atomic_inc_noret_i32_offset_system:
+; CI: ; %bb.0:
+; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; CI-NEXT: v_mov_b32_e32 v2, 42
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_add_u32 s0, s0, 16
+; CI-NEXT: s_addc_u32 s1, s1, 0
+; CI-NEXT: v_mov_b32_e32 v0, s0
+; CI-NEXT: v_mov_b32_e32 v1, s1
+; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CI-NEXT: flat_atomic_inc v[0:1], v2
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: buffer_wbinvl1_vol
+; CI-NEXT: s_endpgm
+;
+; VI-LABEL: global_atomic_inc_noret_i32_offset_system:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; VI-NEXT: v_mov_b32_e32 v2, 42
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_add_u32 s0, s0, 16
+; VI-NEXT: s_addc_u32 s1, s1, 0
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: v_mov_b32_e32 v1, s1
+; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT: flat_atomic_inc v[0:1], v2
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: buffer_wbinvl1_vol
+; VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: global_atomic_inc_noret_i32_offset_system:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v0, 42
+; GFX9-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_atomic_inc v1, v0, s[0:1] offset:16
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: buffer_wbinvl1_vol
+; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: global_atomic_inc_noret_i32_offset_system:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX10-NEXT: v_mov_b32_e32 v0, 42
+; GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: global_atomic_inc v1, v0, s[0:1] offset:16
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: global_atomic_inc_noret_i32_offset_system:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
+; GFX11-NEXT: v_dual_mov_b32 v0, 42 :: v_dual_mov_b32 v1, 0
+; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: global_atomic_inc_u32 v1, v0, s[0:1] offset:16
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: buffer_gl0_inv
+; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: s_endpgm
%gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
%result = atomicrmw uinc_wrap ptr addrspace(1) %gep, i32 42 seq_cst, align 4
@@ -702,7 +856,7 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_addr64(ptr addrspace
%gep.tid = getelementptr i32, ptr addrspace(1) %ptr, i32 %id
%out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id
%gep = getelementptr i32, ptr addrspace(1) %gep.tid, i32 5
- %result = atomicrmw uinc_wrap ptr addrspace(1) %gep, i32 42 seq_cst, align 4
+ %result = atomicrmw uinc_wrap ptr addrspace(1) %gep, i32 42 syncscope("agent") seq_cst, align 4
store i32 %result, ptr addrspace(1) %out.gep, align 4
ret void
}
@@ -782,7 +936,7 @@ define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_addr64(ptr addrspa
%id = call i32 @llvm.amdgcn.workitem.id.x()
%gep.tid = getelementptr i32, ptr addrspace(1) %ptr, i32 %id
%gep = getelementptr i32, ptr addrspace(1) %gep.tid, i32 5
- %result = atomicrmw uinc_wrap ptr addrspace(1) %gep, i32 42 seq_cst, align 4
+ %result = atomicrmw uinc_wrap ptr addrspace(1) %gep, i32 42 syncscope("agent") seq_cst, align 4
ret void
}
@@ -873,7 +1027,7 @@ define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(ptr addrspace(1) %out,
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #2
%idx.0 = add nsw i32 %tid.x, 2
%arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds0, i32 0, i32 %idx.0
- %result = atomicrmw uinc_wrap ptr addrspace(3) %arrayidx0, i32 9 seq_cst, align 4
+ %result = atomicrmw uinc_wrap ptr addrspace(3) %arrayidx0, i32 9 syncscope("agent") seq_cst, align 4
store i32 %idx.0, ptr addrspace(1) %add_use, align 4
store i32 %result, ptr addrspace(1) %out, align 4
ret void
@@ -964,7 +1118,7 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i64(ptr addrspace(1) %out, ptr add
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- %result = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i64 42 seq_cst, align 8
+ %result = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i64 42 syncscope("agent") seq_cst, align 8
store i64 %result, ptr addrspace(1) %out, align 4
ret void
}
@@ -1055,7 +1209,7 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(ptr addrspace(1) %out,
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
- %result = atomicrmw uinc_wrap ptr addrspace(3) %gep, i64 42 seq_cst, align 8
+ %result = atomicrmw uinc_wrap ptr addrspace(3) %gep, i64 42 syncscope("agent") seq_cst, align 8
store i64 %result, ptr addrspace(1) %out, align 4
ret void
}
@@ -1125,7 +1279,7 @@ define amdgpu_kernel void @lds_atomic_inc_noret_i64(ptr addrspace(3) %ptr) #1 {
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: s_endpgm
- %result = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i64 42 seq_cst, align 8
+ %result = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i64 42 syncscope("agent") seq_cst, align 8
ret void
}
@@ -1195,7 +1349,7 @@ define amdgpu_kernel void @lds_atomic_inc_noret_i64_offset(ptr addrspace(3) %ptr
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: s_endpgm
%gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
- %result = atomicrmw uinc_wrap ptr addrspace(3) %gep, i64 42 seq_cst, align 8
+ %result = atomicrmw uinc_wrap ptr addrspace(3) %gep, i64 42 syncscope("agent") seq_cst, align 8
ret void
}
@@ -1277,7 +1431,7 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64(ptr addrspace(1) %out, ptr
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- %result = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i64 42 seq_cst, align 8
+ %result = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i64 42 syncscope("agent") seq_cst, align 8
store i64 %result, ptr addrspace(1) %out, align 4
ret void
}
@@ -1363,6 +1517,94 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64_offset(ptr addrspace(1) %ou
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+ %gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4
+ %result = atomicrmw uinc_wrap ptr addrspace(1) %gep, i64 42 syncscope("agent") seq_cst, align 8
+ store i64 %result, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_system(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #1 {
+; CI-LABEL: global_atomic_inc_ret_i64_offset_system:
+; CI: ; %bb.0:
+; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; CI-NEXT: v_mov_b32_e32 v0, 42
+; CI-NEXT: v_mov_b32_e32 v1, 0
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_add_u32 s2, s2, 32
+; CI-NEXT: s_addc_u32 s3, s3, 0
+; CI-NEXT: v_mov_b32_e32 v2, s2
+; CI-NEXT: v_mov_b32_e32 v3, s3
+; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: buffer_wbinvl1_vol
+; CI-NEXT: v_mov_b32_e32 v3, s1
+; CI-NEXT: v_mov_b32_e32 v2, s0
+; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; CI-NEXT: s_endpgm
+;
+; VI-LABEL: global_atomic_inc_ret_i64_offset_system:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; VI-NEXT: v_mov_b32_e32 v0, 42
+; VI-NEXT: v_mov_b32_e32 v1, 0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_add_u32 s2, s2, 32
+; VI-NEXT: s_addc_u32 s3, s3, 0
+; VI-NEXT: v_mov_b32_e32 v2, s2
+; VI-NEXT: v_mov_b32_e32 v3, s3
+; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: buffer_wbinvl1_vol
+; VI-NEXT: v_mov_b32_e32 v3, s1
+; VI-NEXT: v_mov_b32_e32 v2, s0
+; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: global_atomic_inc_ret_i64_offset_system:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v0, 42
+; GFX9-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_atomic_inc_x2 v[0:1], v2, v[0:1], s[2:3] offset:32 glc
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: buffer_wbinvl1_vol
+; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: global_atomic_inc_ret_i64_offset_system:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX10-NEXT: v_mov_b32_e32 v0, 42
+; GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-NEXT: v_mov_b32_e32 v2, 0
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: global_atomic_inc_x2 v[0:1], v2, v[0:1], s[2:3] offset:32 glc
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: global_atomic_inc_ret_i64_offset_system:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
+; GFX11-NEXT: v_mov_b32_e32 v0, 42
+; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
+; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: global_atomic_inc_u64 v[0:1], v2, v[0:1], s[2:3] offset:32 glc
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: buffer_gl0_inv
+; GFX11-NEXT: buffer_gl1_inv
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4
%result = atomicrmw uinc_wrap ptr addrspace(1) %gep, i64 42 seq_cst, align 8
@@ -1437,7 +1679,7 @@ define amdgpu_kernel void @global_atomic_inc_noret_i64(ptr addrspace(1) %ptr) #1
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: s_endpgm
- %result = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i64 42 seq_cst, align 8
+ %result = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i64 42 syncscope("agent") seq_cst, align 8
ret void
}
@@ -1511,6 +1753,82 @@ define amdgpu_kernel void @global_atomic_inc_noret_i64_offset(ptr addrspace(1) %
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: buffer_gl1_inv
+; GFX11-NEXT: s_endpgm
+ %gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4
+ %result = atomicrmw uinc_wrap ptr addrspace(1) %gep, i64 42 syncscope("agent") seq_cst, align 8
+ ret void
+}
+
+define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_system(ptr addrspace(1) %ptr) #1 {
+; CI-LABEL: global_atomic_inc_noret_i64_offset_system:
+; CI: ; %bb.0:
+; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; CI-NEXT: v_mov_b32_e32 v0, 42
+; CI-NEXT: v_mov_b32_e32 v1, 0
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_add_u32 s0, s0, 32
+; CI-NEXT: s_addc_u32 s1, s1, 0
+; CI-NEXT: v_mov_b32_e32 v3, s1
+; CI-NEXT: v_mov_b32_e32 v2, s0
+; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1]
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: buffer_wbinvl1_vol
+; CI-NEXT: s_endpgm
+;
+; VI-LABEL: global_atomic_inc_noret_i64_offset_system:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; VI-NEXT: v_mov_b32_e32 v0, 42
+; VI-NEXT: v_mov_b32_e32 v1, 0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_add_u32 s0, s0, 32
+; VI-NEXT: s_addc_u32 s1, s1, 0
+; VI-NEXT: v_mov_b32_e32 v3, s1
+; VI-NEXT: v_mov_b32_e32 v2, s0
+; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1]
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: buffer_wbinvl1_vol
+; VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: global_atomic_inc_noret_i64_offset_system:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v0, 42
+; GFX9-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_atomic_inc_x2 v2, v[0:1], s[0:1] offset:32
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: buffer_wbinvl1_vol
+; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: global_atomic_inc_noret_i64_offset_system:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX10-NEXT: v_mov_b32_e32 v0, 42
+; GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-NEXT: v_mov_b32_e32 v2, 0
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: global_atomic_inc_x2 v2, v[0:1], s[0:1] offset:32
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: global_atomic_inc_noret_i64_offset_system:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
+; GFX11-NEXT: v_mov_b32_e32 v0, 42
+; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
+; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: global_atomic_inc_u64 v2, v[0:1], s[0:1] offset:32
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: buffer_gl0_inv
+; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: s_endpgm
%gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4
%result = atomicrmw uinc_wrap ptr addrspace(1) %gep, i64 42 seq_cst, align 8
@@ -1613,7 +1931,7 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(ptr addrspace
%gep.tid = getelementptr i64, ptr addrspace(1) %ptr, i32 %id
%out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id
%gep = getelementptr i64, ptr addrspace(1) %gep.tid, i32 5
- %result = atomicrmw uinc_wrap ptr addrspace(1) %gep, i64 42 seq_cst, align 8
+ %result = atomicrmw uinc_wrap ptr addrspace(1) %gep, i64 42 syncscope("agent") seq_cst, align 8
store i64 %result, ptr addrspace(1) %out.gep, align 4
ret void
}
@@ -1698,7 +2016,7 @@ define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_addr64(ptr addrspa
%id = call i32 @llvm.amdgcn.workitem.id.x()
%gep.tid = getelementptr i64, ptr addrspace(1) %ptr, i32 %id
%gep = getelementptr i64, ptr addrspace(1) %gep.tid, i32 5
- %result = atomicrmw uinc_wrap ptr addrspace(1) %gep, i64 42 seq_cst, align 8
+ %result = atomicrmw uinc_wrap ptr addrspace(1) %gep, i64 42 syncscope("agent") seq_cst, align 8
ret void
}
@@ -1784,7 +2102,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32(ptr %out, ptr %ptr) #1 {
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: flat_store_b32 v[0:1], v2
; GFX11-NEXT: s_endpgm
- %result = atomicrmw uinc_wrap ptr %ptr, i32 42 seq_cst, align 4
+ %result = atomicrmw uinc_wrap ptr %ptr, i32 42 syncscope("agent") seq_cst, align 4
store i32 %result, ptr %out, align 4
ret void
}
@@ -1834,61 +2152,231 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(ptr %out, ptr %ptr) #1
; GFX9-NEXT: v_mov_b32_e32 v0, s2
; GFX9-NEXT: v_mov_b32_e32 v1, s3
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX9-NEXT: flat_atomic_inc v2, v[0:1], v2 offset:16 glc
+; GFX9-NEXT: flat_atomic_inc v2, v[0:1], v2 offset:16 glc
+; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-NEXT: buffer_wbinvl1_vol
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: flat_store_dword v[0:1], v2
+; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: flat_atomic_inc_ret_i32_offset:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX10-NEXT: v_mov_b32_e32 v2, 42
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_add_u32 s2, s2, 16
+; GFX10-NEXT: s_addc_u32 s3, s3, 0
+; GFX10-NEXT: v_mov_b32_e32 v0, s2
+; GFX10-NEXT: v_mov_b32_e32 v1, s3
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: flat_atomic_inc v2, v[0:1], v2 glc
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GFX10-NEXT: v_mov_b32_e32 v1, s1
+; GFX10-NEXT: flat_store_dword v[0:1], v2
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: flat_atomic_inc_ret_i32_offset:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
+; GFX11-NEXT: v_mov_b32_e32 v2, 42
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
+; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: flat_atomic_inc_u32 v2, v[0:1], v2 offset:16 glc
+; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: buffer_gl0_inv
+; GFX11-NEXT: buffer_gl1_inv
+; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-NEXT: flat_store_b32 v[0:1], v2
+; GFX11-NEXT: s_endpgm
+ %gep = getelementptr i32, ptr %ptr, i32 4
+ %result = atomicrmw uinc_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
+ store i32 %result, ptr %out, align 4
+ ret void
+}
+
+define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_system(ptr %out, ptr %ptr) #1 {
+; CI-LABEL: flat_atomic_inc_ret_i32_offset_system:
+; CI: ; %bb.0:
+; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; CI-NEXT: v_mov_b32_e32 v2, 42
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_add_u32 s2, s2, 16
+; CI-NEXT: s_addc_u32 s3, s3, 0
+; CI-NEXT: v_mov_b32_e32 v0, s2
+; CI-NEXT: v_mov_b32_e32 v1, s3
+; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CI-NEXT: flat_atomic_inc v2, v[0:1], v2 glc
+; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CI-NEXT: buffer_wbinvl1_vol
+; CI-NEXT: v_mov_b32_e32 v0, s0
+; CI-NEXT: v_mov_b32_e32 v1, s1
+; CI-NEXT: flat_store_dword v[0:1], v2
+; CI-NEXT: s_endpgm
+;
+; VI-LABEL: flat_atomic_inc_ret_i32_offset_system:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; VI-NEXT: v_mov_b32_e32 v2, 42
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_add_u32 s2, s2, 16
+; VI-NEXT: s_addc_u32 s3, s3, 0
+; VI-NEXT: v_mov_b32_e32 v0, s2
+; VI-NEXT: v_mov_b32_e32 v1, s3
+; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT: flat_atomic_inc v2, v[0:1], v2 glc
+; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT: buffer_wbinvl1_vol
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: v_mov_b32_e32 v1, s1
+; VI-NEXT: flat_store_dword v[0:1], v2
+; VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: flat_atomic_inc_ret_i32_offset_system:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v2, 42
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-NEXT: v_mov_b32_e32 v1, s3
+; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-NEXT: flat_atomic_inc v2, v[0:1], v2 offset:16 glc
+; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-NEXT: buffer_wbinvl1_vol
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: flat_store_dword v[0:1], v2
+; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: flat_atomic_inc_ret_i32_offset_system:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX10-NEXT: v_mov_b32_e32 v2, 42
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_add_u32 s2, s2, 16
+; GFX10-NEXT: s_addc_u32 s3, s3, 0
+; GFX10-NEXT: v_mov_b32_e32 v0, s2
+; GFX10-NEXT: v_mov_b32_e32 v1, s3
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: flat_atomic_inc v2, v[0:1], v2 glc
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GFX10-NEXT: v_mov_b32_e32 v1, s1
+; GFX10-NEXT: flat_store_dword v[0:1], v2
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: flat_atomic_inc_ret_i32_offset_system:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
+; GFX11-NEXT: v_mov_b32_e32 v2, 42
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
+; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: flat_atomic_inc_u32 v2, v[0:1], v2 offset:16 glc
+; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: buffer_gl0_inv
+; GFX11-NEXT: buffer_gl1_inv
+; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-NEXT: flat_store_b32 v[0:1], v2
+; GFX11-NEXT: s_endpgm
+ %gep = getelementptr i32, ptr %ptr, i32 4
+ %result = atomicrmw uinc_wrap ptr %gep, i32 42 seq_cst, align 4
+ store i32 %result, ptr %out, align 4
+ ret void
+}
+
+define amdgpu_kernel void @flat_atomic_inc_noret_i32(ptr %ptr) #1 {
+; CI-LABEL: flat_atomic_inc_noret_i32:
+; CI: ; %bb.0:
+; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; CI-NEXT: v_mov_b32_e32 v2, 42
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: v_mov_b32_e32 v0, s0
+; CI-NEXT: v_mov_b32_e32 v1, s1
+; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CI-NEXT: flat_atomic_inc v[0:1], v2
+; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CI-NEXT: buffer_wbinvl1_vol
+; CI-NEXT: s_endpgm
+;
+; VI-LABEL: flat_atomic_inc_noret_i32:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; VI-NEXT: v_mov_b32_e32 v2, 42
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: v_mov_b32_e32 v1, s1
+; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT: flat_atomic_inc v[0:1], v2
+; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT: buffer_wbinvl1_vol
+; VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: flat_atomic_inc_noret_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v2, 42
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-NEXT: flat_atomic_inc v[0:1], v2
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-NEXT: buffer_wbinvl1_vol
-; GFX9-NEXT: v_mov_b32_e32 v0, s0
-; GFX9-NEXT: v_mov_b32_e32 v1, s1
-; GFX9-NEXT: flat_store_dword v[0:1], v2
; GFX9-NEXT: s_endpgm
;
-; GFX10-LABEL: flat_atomic_inc_ret_i32_offset:
+; GFX10-LABEL: flat_atomic_inc_noret_i32:
; GFX10: ; %bb.0:
-; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX10-NEXT: v_mov_b32_e32 v2, 42
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: s_add_u32 s2, s2, 16
-; GFX10-NEXT: s_addc_u32 s3, s3, 0
-; GFX10-NEXT: v_mov_b32_e32 v0, s2
-; GFX10-NEXT: v_mov_b32_e32 v1, s3
+; GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GFX10-NEXT: v_mov_b32_e32 v1, s1
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: flat_atomic_inc v2, v[0:1], v2 glc
-; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: flat_atomic_inc v[0:1], v2
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: buffer_gl0_inv
; GFX10-NEXT: buffer_gl1_inv
-; GFX10-NEXT: v_mov_b32_e32 v0, s0
-; GFX10-NEXT: v_mov_b32_e32 v1, s1
-; GFX10-NEXT: flat_store_dword v[0:1], v2
; GFX10-NEXT: s_endpgm
;
-; GFX11-LABEL: flat_atomic_inc_ret_i32_offset:
+; GFX11-LABEL: flat_atomic_inc_noret_i32:
; GFX11: ; %bb.0:
-; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: v_mov_b32_e32 v2, 42
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
+; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: flat_atomic_inc_u32 v2, v[0:1], v2 offset:16 glc
-; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: flat_atomic_inc_u32 v[0:1], v2
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: buffer_gl1_inv
-; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
-; GFX11-NEXT: flat_store_b32 v[0:1], v2
; GFX11-NEXT: s_endpgm
- %gep = getelementptr i32, ptr %ptr, i32 4
- %result = atomicrmw uinc_wrap ptr %gep, i32 42 seq_cst, align 4
- store i32 %result, ptr %out, align 4
+ %result = atomicrmw uinc_wrap ptr %ptr, i32 42 syncscope("agent") seq_cst, align 4
ret void
}
-define amdgpu_kernel void @flat_atomic_inc_noret_i32(ptr %ptr) #1 {
-; CI-LABEL: flat_atomic_inc_noret_i32:
+define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(ptr %ptr) #1 {
+; CI-LABEL: flat_atomic_inc_noret_i32_offset:
; CI: ; %bb.0:
; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; CI-NEXT: v_mov_b32_e32 v2, 42
; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_add_u32 s0, s0, 16
+; CI-NEXT: s_addc_u32 s1, s1, 0
; CI-NEXT: v_mov_b32_e32 v0, s0
; CI-NEXT: v_mov_b32_e32 v1, s1
; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -1897,11 +2385,13 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32(ptr %ptr) #1 {
; CI-NEXT: buffer_wbinvl1_vol
; CI-NEXT: s_endpgm
;
-; VI-LABEL: flat_atomic_inc_noret_i32:
+; VI-LABEL: flat_atomic_inc_noret_i32_offset:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; VI-NEXT: v_mov_b32_e32 v2, 42
; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_add_u32 s0, s0, 16
+; VI-NEXT: s_addc_u32 s1, s1, 0
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -1910,7 +2400,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32(ptr %ptr) #1 {
; VI-NEXT: buffer_wbinvl1_vol
; VI-NEXT: s_endpgm
;
-; GFX9-LABEL: flat_atomic_inc_noret_i32:
+; GFX9-LABEL: flat_atomic_inc_noret_i32_offset:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX9-NEXT: v_mov_b32_e32 v2, 42
@@ -1918,16 +2408,18 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32(ptr %ptr) #1 {
; GFX9-NEXT: v_mov_b32_e32 v0, s0
; GFX9-NEXT: v_mov_b32_e32 v1, s1
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX9-NEXT: flat_atomic_inc v[0:1], v2
+; GFX9-NEXT: flat_atomic_inc v[0:1], v2 offset:16
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-NEXT: buffer_wbinvl1_vol
; GFX9-NEXT: s_endpgm
;
-; GFX10-LABEL: flat_atomic_inc_noret_i32:
+; GFX10-LABEL: flat_atomic_inc_noret_i32_offset:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX10-NEXT: v_mov_b32_e32 v2, 42
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_add_u32 s0, s0, 16
+; GFX10-NEXT: s_addc_u32 s1, s1, 0
; GFX10-NEXT: v_mov_b32_e32 v0, s0
; GFX10-NEXT: v_mov_b32_e32 v1, s1
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -1939,7 +2431,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32(ptr %ptr) #1 {
; GFX10-NEXT: buffer_gl1_inv
; GFX10-NEXT: s_endpgm
;
-; GFX11-LABEL: flat_atomic_inc_noret_i32:
+; GFX11-LABEL: flat_atomic_inc_noret_i32_offset:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: v_mov_b32_e32 v2, 42
@@ -1947,18 +2439,19 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32(ptr %ptr) #1 {
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: flat_atomic_inc_u32 v[0:1], v2
+; GFX11-NEXT: flat_atomic_inc_u32 v[0:1], v2 offset:16
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: s_endpgm
- %result = atomicrmw uinc_wrap ptr %ptr, i32 42 seq_cst, align 4
+ %gep = getelementptr i32, ptr %ptr, i32 4
+ %result = atomicrmw uinc_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
ret void
}
-define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(ptr %ptr) #1 {
-; CI-LABEL: flat_atomic_inc_noret_i32_offset:
+define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_system(ptr %ptr) #1 {
+; CI-LABEL: flat_atomic_inc_noret_i32_offset_system:
; CI: ; %bb.0:
; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; CI-NEXT: v_mov_b32_e32 v2, 42
@@ -1973,7 +2466,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(ptr %ptr) #1 {
; CI-NEXT: buffer_wbinvl1_vol
; CI-NEXT: s_endpgm
;
-; VI-LABEL: flat_atomic_inc_noret_i32_offset:
+; VI-LABEL: flat_atomic_inc_noret_i32_offset_system:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; VI-NEXT: v_mov_b32_e32 v2, 42
@@ -1988,7 +2481,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(ptr %ptr) #1 {
; VI-NEXT: buffer_wbinvl1_vol
; VI-NEXT: s_endpgm
;
-; GFX9-LABEL: flat_atomic_inc_noret_i32_offset:
+; GFX9-LABEL: flat_atomic_inc_noret_i32_offset_system:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX9-NEXT: v_mov_b32_e32 v2, 42
@@ -2001,7 +2494,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(ptr %ptr) #1 {
; GFX9-NEXT: buffer_wbinvl1_vol
; GFX9-NEXT: s_endpgm
;
-; GFX10-LABEL: flat_atomic_inc_noret_i32_offset:
+; GFX10-LABEL: flat_atomic_inc_noret_i32_offset_system:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX10-NEXT: v_mov_b32_e32 v2, 42
@@ -2019,7 +2512,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(ptr %ptr) #1 {
; GFX10-NEXT: buffer_gl1_inv
; GFX10-NEXT: s_endpgm
;
-; GFX11-LABEL: flat_atomic_inc_noret_i32_offset:
+; GFX11-LABEL: flat_atomic_inc_noret_i32_offset_system:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: v_mov_b32_e32 v2, 42
@@ -2156,7 +2649,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(ptr %out, ptr %
%gep.tid = getelementptr i32, ptr %ptr, i32 %id
%out.gep = getelementptr i32, ptr %out, i32 %id
%gep = getelementptr i32, ptr %gep.tid, i32 5
- %result = atomicrmw uinc_wrap ptr %gep, i32 42 seq_cst, align 4
+ %result = atomicrmw uinc_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
store i32 %result, ptr %out.gep, align 4
ret void
}
@@ -2256,7 +2749,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(ptr %ptr) #1
%id = call i32 @llvm.amdgcn.workitem.id.x()
%gep.tid = getelementptr i32, ptr %ptr, i32 %id
%gep = getelementptr i32, ptr %gep.tid, i32 5
- %result = atomicrmw uinc_wrap ptr %gep, i32 42 seq_cst, align 4
+ %result = atomicrmw uinc_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
ret void
}
@@ -2352,7 +2845,7 @@ define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(ptr addrspace(1) %out,
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #2
%idx.0 = add nsw i32 %tid.x, 2
%arrayidx0 = getelementptr inbounds [512 x i64], ptr addrspace(3) @lds1, i32 0, i32 %idx.0
- %result = atomicrmw uinc_wrap ptr addrspace(3) %arrayidx0, i64 9 seq_cst, align 8
+ %result = atomicrmw uinc_wrap ptr addrspace(3) %arrayidx0, i64 9 syncscope("agent") seq_cst, align 8
store i32 %idx.0, ptr addrspace(1) %add_use, align 4
store i64 %result, ptr addrspace(1) %out, align 4
ret void
@@ -2455,7 +2948,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64(ptr %out, ptr %ptr) #1 {
; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX11-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX11-NEXT: s_endpgm
- %result = atomicrmw uinc_wrap ptr %ptr, i64 42 seq_cst, align 8
+ %result = atomicrmw uinc_wrap ptr %ptr, i64 42 syncscope("agent") seq_cst, align 8
store i64 %result, ptr %out, align 4
ret void
}
@@ -2562,6 +3055,115 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(ptr %out, ptr %ptr) #1
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX11-NEXT: flat_store_b64 v[2:3], v[0:1]
+; GFX11-NEXT: s_endpgm
+ %gep = getelementptr i64, ptr %ptr, i32 4
+ %result = atomicrmw uinc_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8
+ store i64 %result, ptr %out, align 4
+ ret void
+}
+
+define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_system(ptr %out, ptr %ptr) #1 {
+; CI-LABEL: flat_atomic_inc_ret_i64_offset_system:
+; CI: ; %bb.0:
+; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; CI-NEXT: v_mov_b32_e32 v0, 42
+; CI-NEXT: v_mov_b32_e32 v1, 0
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_add_u32 s2, s2, 32
+; CI-NEXT: s_addc_u32 s3, s3, 0
+; CI-NEXT: v_mov_b32_e32 v2, s2
+; CI-NEXT: v_mov_b32_e32 v3, s3
+; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc
+; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CI-NEXT: buffer_wbinvl1_vol
+; CI-NEXT: v_mov_b32_e32 v3, s1
+; CI-NEXT: v_mov_b32_e32 v2, s0
+; CI-NEXT: s_add_u32 s0, s0, 4
+; CI-NEXT: s_addc_u32 s1, s1, 0
+; CI-NEXT: v_mov_b32_e32 v5, s1
+; CI-NEXT: v_mov_b32_e32 v4, s0
+; CI-NEXT: flat_store_dword v[2:3], v0
+; CI-NEXT: flat_store_dword v[4:5], v1
+; CI-NEXT: s_endpgm
+;
+; VI-LABEL: flat_atomic_inc_ret_i64_offset_system:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; VI-NEXT: v_mov_b32_e32 v0, 42
+; VI-NEXT: v_mov_b32_e32 v1, 0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_add_u32 s2, s2, 32
+; VI-NEXT: s_addc_u32 s3, s3, 0
+; VI-NEXT: v_mov_b32_e32 v2, s2
+; VI-NEXT: v_mov_b32_e32 v3, s3
+; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc
+; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT: buffer_wbinvl1_vol
+; VI-NEXT: v_mov_b32_e32 v3, s1
+; VI-NEXT: v_mov_b32_e32 v2, s0
+; VI-NEXT: s_add_u32 s0, s0, 4
+; VI-NEXT: s_addc_u32 s1, s1, 0
+; VI-NEXT: v_mov_b32_e32 v5, s1
+; VI-NEXT: v_mov_b32_e32 v4, s0
+; VI-NEXT: flat_store_dword v[2:3], v0
+; VI-NEXT: flat_store_dword v[4:5], v1
+; VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: flat_atomic_inc_ret_i64_offset_system:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v0, 42
+; GFX9-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v2, s2
+; GFX9-NEXT: v_mov_b32_e32 v3, s3
+; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] offset:32 glc
+; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-NEXT: buffer_wbinvl1_vol
+; GFX9-NEXT: v_mov_b32_e32 v3, s1
+; GFX9-NEXT: v_mov_b32_e32 v2, s0
+; GFX9-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: flat_atomic_inc_ret_i64_offset_system:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX10-NEXT: v_mov_b32_e32 v0, 42
+; GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_add_u32 s2, s2, 32
+; GFX10-NEXT: s_addc_u32 s3, s3, 0
+; GFX10-NEXT: v_mov_b32_e32 v2, s2
+; GFX10-NEXT: v_mov_b32_e32 v3, s3
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10-NEXT: v_mov_b32_e32 v3, s1
+; GFX10-NEXT: v_mov_b32_e32 v2, s0
+; GFX10-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: flat_atomic_inc_ret_i64_offset_system:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
+; GFX11-NEXT: v_mov_b32_e32 v0, 42
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s2
+; GFX11-NEXT: v_mov_b32_e32 v3, s3
+; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: flat_atomic_inc_u64 v[0:1], v[2:3], v[0:1] offset:32 glc
+; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: buffer_gl0_inv
+; GFX11-NEXT: buffer_gl1_inv
+; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
+; GFX11-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX11-NEXT: s_endpgm
%gep = getelementptr i64, ptr %ptr, i32 4
%result = atomicrmw uinc_wrap ptr %gep, i64 42 seq_cst, align 8
@@ -2644,7 +3246,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64(ptr %ptr) #1 {
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: s_endpgm
- %result = atomicrmw uinc_wrap ptr %ptr, i64 42 seq_cst, align 8
+ %result = atomicrmw uinc_wrap ptr %ptr, i64 42 syncscope("agent") seq_cst, align 8
ret void
}
@@ -2728,6 +3330,92 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(ptr %ptr) #1 {
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: buffer_gl1_inv
+; GFX11-NEXT: s_endpgm
+ %gep = getelementptr i64, ptr %ptr, i32 4
+ %result = atomicrmw uinc_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8
+ ret void
+}
+
+define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_system(ptr %ptr) #1 {
+; CI-LABEL: flat_atomic_inc_noret_i64_offset_system:
+; CI: ; %bb.0:
+; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; CI-NEXT: v_mov_b32_e32 v0, 42
+; CI-NEXT: v_mov_b32_e32 v1, 0
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_add_u32 s0, s0, 32
+; CI-NEXT: s_addc_u32 s1, s1, 0
+; CI-NEXT: v_mov_b32_e32 v3, s1
+; CI-NEXT: v_mov_b32_e32 v2, s0
+; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1]
+; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CI-NEXT: buffer_wbinvl1_vol
+; CI-NEXT: s_endpgm
+;
+; VI-LABEL: flat_atomic_inc_noret_i64_offset_system:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; VI-NEXT: v_mov_b32_e32 v0, 42
+; VI-NEXT: v_mov_b32_e32 v1, 0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_add_u32 s0, s0, 32
+; VI-NEXT: s_addc_u32 s1, s1, 0
+; VI-NEXT: v_mov_b32_e32 v3, s1
+; VI-NEXT: v_mov_b32_e32 v2, s0
+; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1]
+; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT: buffer_wbinvl1_vol
+; VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: flat_atomic_inc_noret_i64_offset_system:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v0, 42
+; GFX9-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v3, s1
+; GFX9-NEXT: v_mov_b32_e32 v2, s0
+; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] offset:32
+; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-NEXT: buffer_wbinvl1_vol
+; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: flat_atomic_inc_noret_i64_offset_system:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX10-NEXT: v_mov_b32_e32 v0, 42
+; GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_add_u32 s0, s0, 32
+; GFX10-NEXT: s_addc_u32 s1, s1, 0
+; GFX10-NEXT: v_mov_b32_e32 v3, s1
+; GFX10-NEXT: v_mov_b32_e32 v2, s0
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1]
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: flat_atomic_inc_noret_i64_offset_system:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
+; GFX11-NEXT: v_mov_b32_e32 v0, 42
+; GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
+; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: flat_atomic_inc_u64 v[2:3], v[0:1] offset:32
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: buffer_gl0_inv
+; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: s_endpgm
%gep = getelementptr i64, ptr %ptr, i32 4
%result = atomicrmw uinc_wrap ptr %gep, i64 42 seq_cst, align 8
@@ -2864,7 +3552,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(ptr %out, ptr %
%gep.tid = getelementptr i64, ptr %ptr, i32 %id
%out.gep = getelementptr i64, ptr %out, i32 %id
%gep = getelementptr i64, ptr %gep.tid, i32 5
- %result = atomicrmw uinc_wrap ptr %gep, i64 42 seq_cst, align 8
+ %result = atomicrmw uinc_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8
store i64 %result, ptr %out.gep, align 4
ret void
}
@@ -2969,7 +3657,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(ptr %ptr) #1
%id = call i32 @llvm.amdgcn.workitem.id.x()
%gep.tid = getelementptr i64, ptr %ptr, i32 %id
%gep = getelementptr i64, ptr %gep.tid, i32 5
- %result = atomicrmw uinc_wrap ptr %gep, i64 42 seq_cst, align 8
+ %result = atomicrmw uinc_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8
ret void
}
@@ -3078,8 +3766,8 @@ define amdgpu_kernel void @nocse_lds_atomic_inc_ret_i32(ptr addrspace(1) %out0,
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- %result0 = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i32 42 seq_cst, align 4
- %result1 = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i32 42 seq_cst, align 4
+ %result0 = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i32 42 syncscope("agent") seq_cst, align 4
+ %result1 = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i32 42 syncscope("agent") seq_cst, align 4
store i32 %result0, ptr addrspace(1) %out0, align 4
store i32 %result1, ptr addrspace(1) %out1, align 4
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll
index 17a0e93dd4a221..c0a9cdf3a9344a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll
@@ -927,7 +927,7 @@ define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4095(ptr addrspace(1) inr
; GFX7-NEXT: buffer_wbinvl1
; GFX7-NEXT: ; return to shader part epilog
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4095
- %result = atomicrmw add ptr addrspace(1) %gep, i32 2 seq_cst
+ %result = atomicrmw add ptr addrspace(1) %gep, i32 2 syncscope("agent") seq_cst
%cast = bitcast i32 %result to float
ret float %cast
}
@@ -968,7 +968,7 @@ define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4294967296(ptr addrspace(
; GFX7-NEXT: buffer_wbinvl1
; GFX7-NEXT: ; return to shader part epilog
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4294967296
- %result = atomicrmw add ptr addrspace(1) %gep, i32 2 seq_cst
+ %result = atomicrmw add ptr addrspace(1) %gep, i32 2 syncscope("agent") seq_cst
%cast = bitcast i32 %result to float
ret float %cast
}
@@ -1003,7 +1003,7 @@ define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4095(ptr addrspace(1) %pt
; GFX7-NEXT: v_mov_b32_e32 v0, v2
; GFX7-NEXT: ; return to shader part epilog
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4095
- %result = atomicrmw add ptr addrspace(1) %gep, i32 2 seq_cst
+ %result = atomicrmw add ptr addrspace(1) %gep, i32 2 syncscope("agent") seq_cst
%cast = bitcast i32 %result to float
ret float %cast
}
@@ -1038,7 +1038,7 @@ define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4294967296(ptr addrspace(
; GFX7-NEXT: v_mov_b32_e32 v0, v2
; GFX7-NEXT: ; return to shader part epilog
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4294967296
- %result = atomicrmw add ptr addrspace(1) %gep, i32 2 seq_cst
+ %result = atomicrmw add ptr addrspace(1) %gep, i32 2 syncscope("agent") seq_cst
%cast = bitcast i32 %result to float
ret float %cast
}
@@ -1077,7 +1077,7 @@ define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_vgpr_offset(ptr addrspace(1) in
; GFX7-NEXT: v_mov_b32_e32 v0, v2
; GFX7-NEXT: ; return to shader part epilog
%gep = getelementptr i32, ptr addrspace(1) %ptr, i32 %voffset
- %result = atomicrmw add ptr addrspace(1) %gep, i32 2 seq_cst
+ %result = atomicrmw add ptr addrspace(1) %gep, i32 2 syncscope("agent") seq_cst
%cast = bitcast i32 %result to float
ret float %cast
}
@@ -1114,7 +1114,7 @@ define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4095(ptr addrspace(1) inreg
; GFX7-NEXT: v_mov_b32_e32 v0, v1
; GFX7-NEXT: ; return to shader part epilog
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4095
- %result.struct = cmpxchg ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst seq_cst
+ %result.struct = cmpxchg ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
%result = extractvalue { i32, i1 } %result.struct, 0
%cast = bitcast i32 %result to float
ret float %cast
@@ -1158,7 +1158,7 @@ define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4294967296(ptr addrspace(1)
; GFX7-NEXT: v_mov_b32_e32 v0, v1
; GFX7-NEXT: ; return to shader part epilog
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4294967296
- %result.struct = cmpxchg ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst seq_cst
+ %result.struct = cmpxchg ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
%result = extractvalue { i32, i1 } %result.struct, 0
%cast = bitcast i32 %result to float
ret float %cast
@@ -1194,7 +1194,7 @@ define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4095(ptr addrspace(1) %ptr,
; GFX7-NEXT: v_mov_b32_e32 v0, v3
; GFX7-NEXT: ; return to shader part epilog
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4095
- %result.struct = cmpxchg ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst seq_cst
+ %result.struct = cmpxchg ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
%result = extractvalue { i32, i1 } %result.struct, 0
%cast = bitcast i32 %result to float
ret float %cast
@@ -1230,7 +1230,7 @@ define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4294967296(ptr addrspace(1)
; GFX7-NEXT: v_mov_b32_e32 v0, v3
; GFX7-NEXT: ; return to shader part epilog
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4294967296
- %result.struct = cmpxchg ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst seq_cst
+ %result.struct = cmpxchg ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
%result = extractvalue { i32, i1 } %result.struct, 0
%cast = bitcast i32 %result to float
ret float %cast
@@ -1270,7 +1270,7 @@ define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_vgpr_offset(ptr addrspace(1) inre
; GFX7-NEXT: v_mov_b32_e32 v0, v2
; GFX7-NEXT: ; return to shader part epilog
%gep = getelementptr i32, ptr addrspace(1) %ptr, i32 %voffset
- %result.struct = cmpxchg ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst seq_cst
+ %result.struct = cmpxchg ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
%result = extractvalue { i32, i1 } %result.struct, 0
%cast = bitcast i32 %result to float
ret float %cast
diff --git a/llvm/test/CodeGen/AMDGPU/acc-ldst.ll b/llvm/test/CodeGen/AMDGPU/acc-ldst.ll
index 9e4569afd6f2e1..1d829c056bd976 100644
--- a/llvm/test/CodeGen/AMDGPU/acc-ldst.ll
+++ b/llvm/test/CodeGen/AMDGPU/acc-ldst.ll
@@ -202,14 +202,14 @@ define amdgpu_kernel void @test_atomic_mfma_4xi32_atomic_store(ptr addrspace(1)
bb:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %tid
- %in.1 = atomicrmw volatile sub ptr addrspace(1) %gep, i32 1 seq_cst
+ %in.1 = atomicrmw volatile sub ptr addrspace(1) %gep, i32 1 syncscope("agent") seq_cst
%tmp0 = insertelement <4 x i32> undef, i32 %in.1, i32 0
%tmp1 = insertelement <4 x i32> %tmp0, i32 0, i32 1
%tmp2 = insertelement <4 x i32> %tmp1, i32 0, i32 2
%tmp3 = insertelement <4 x i32> %tmp2, i32 0, i32 3
%mai.1 = tail call <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32 1, i32 2, <4 x i32> %tmp3, i32 0, i32 0, i32 0)
%elt = extractelement <4 x i32> %mai.1, i32 0
- %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 %elt seq_cst
+ %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 %elt syncscope("agent") seq_cst
store i32 %val, ptr addrspace(1) %arg
ret void
}
@@ -225,7 +225,7 @@ define amdgpu_kernel void @test_atomic_mfma_4xi32_atomic64_store(ptr addrspace(1
bb:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds i64, ptr addrspace(1) %arg, i32 %tid
- %in.1 = atomicrmw volatile sub ptr addrspace(1) %gep, i64 1 seq_cst
+ %in.1 = atomicrmw volatile sub ptr addrspace(1) %gep, i64 1 syncscope("agent") seq_cst
%tmp0 = insertelement <2 x i64> undef, i64 %in.1, i32 0
%tmp1 = insertelement <2 x i64> %tmp0, i64 0, i32 1
%tmp2 = bitcast <2 x i64> %tmp0 to <4 x i32>
@@ -235,7 +235,7 @@ bb:
%v2.1 = insertelement <2 x i32> undef, i32 %elt.1, i32 0
%v2.2 = insertelement <2 x i32> %v2.1, i32 %elt.2, i32 1
%v2 = bitcast <2 x i32> %v2.2 to i64
- %val = atomicrmw volatile add ptr addrspace(1) %gep, i64 %v2 seq_cst
+ %val = atomicrmw volatile add ptr addrspace(1) %gep, i64 %v2 syncscope("agent") seq_cst
store i64 %val, ptr addrspace(1) %arg
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
index e54d6e0d470d88..81fd166e3779f8 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
@@ -226,7 +226,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
- %old = atomicrmw add ptr addrspace(1) %inout, i32 5 acq_rel
+ %old = atomicrmw add ptr addrspace(1) %inout, i32 5 syncscope("agent") acq_rel
store i32 %old, ptr addrspace(1) %out
ret void
}
@@ -493,7 +493,7 @@ define amdgpu_kernel void @add_i32_uniform(ptr addrspace(1) %out, ptr addrspace(
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
- %old = atomicrmw add ptr addrspace(1) %inout, i32 %additive acq_rel
+ %old = atomicrmw add ptr addrspace(1) %inout, i32 %additive syncscope("agent") acq_rel
store i32 %old, ptr addrspace(1) %out
ret void
}
@@ -823,7 +823,7 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1132-NEXT: s_endpgm
entry:
%lane = call i32 @llvm.amdgcn.workitem.id.x()
- %old = atomicrmw add ptr addrspace(1) %inout, i32 %lane acq_rel
+ %old = atomicrmw add ptr addrspace(1) %inout, i32 %lane syncscope("agent") acq_rel
store i32 %old, ptr addrspace(1) %out
ret void
}
@@ -1062,7 +1062,7 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out, ptr addrspace
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
- %old = atomicrmw add ptr addrspace(1) %inout, i64 5 acq_rel
+ %old = atomicrmw add ptr addrspace(1) %inout, i64 5 syncscope("agent") acq_rel
store i64 %old, ptr addrspace(1) %out
ret void
}
@@ -1381,7 +1381,7 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
- %old = atomicrmw add ptr addrspace(1) %inout, i64 %additive acq_rel
+ %old = atomicrmw add ptr addrspace(1) %inout, i64 %additive syncscope("agent") acq_rel
store i64 %old, ptr addrspace(1) %out
ret void
}
@@ -1475,7 +1475,7 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace(
entry:
%lane = call i32 @llvm.amdgcn.workitem.id.x()
%zext = zext i32 %lane to i64
- %old = atomicrmw add ptr addrspace(1) %inout, i64 %zext acq_rel
+ %old = atomicrmw add ptr addrspace(1) %inout, i64 %zext syncscope("agent") acq_rel
store i64 %old, ptr addrspace(1) %out
ret void
}
@@ -1735,7 +1735,7 @@ define amdgpu_kernel void @sub_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
- %old = atomicrmw sub ptr addrspace(1) %inout, i32 5 acq_rel
+ %old = atomicrmw sub ptr addrspace(1) %inout, i32 5 syncscope("agent") acq_rel
store i32 %old, ptr addrspace(1) %out
ret void
}
@@ -2006,7 +2006,7 @@ define amdgpu_kernel void @sub_i32_uniform(ptr addrspace(1) %out, ptr addrspace(
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
- %old = atomicrmw sub ptr addrspace(1) %inout, i32 %subitive acq_rel
+ %old = atomicrmw sub ptr addrspace(1) %inout, i32 %subitive syncscope("agent") acq_rel
store i32 %old, ptr addrspace(1) %out
ret void
}
@@ -2336,7 +2336,7 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1132-NEXT: s_endpgm
entry:
%lane = call i32 @llvm.amdgcn.workitem.id.x()
- %old = atomicrmw sub ptr addrspace(1) %inout, i32 %lane acq_rel
+ %old = atomicrmw sub ptr addrspace(1) %inout, i32 %lane syncscope("agent") acq_rel
store i32 %old, ptr addrspace(1) %out
ret void
}
@@ -2627,7 +2627,7 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out, ptr addrspace
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
- %old = atomicrmw sub ptr addrspace(1) %inout, i64 5 acq_rel
+ %old = atomicrmw sub ptr addrspace(1) %inout, i64 5 syncscope("agent") acq_rel
store i64 %old, ptr addrspace(1) %out
ret void
}
@@ -2959,7 +2959,7 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
- %old = atomicrmw sub ptr addrspace(1) %inout, i64 %subitive acq_rel
+ %old = atomicrmw sub ptr addrspace(1) %inout, i64 %subitive syncscope("agent") acq_rel
store i64 %old, ptr addrspace(1) %out
ret void
}
@@ -3053,7 +3053,7 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace(
entry:
%lane = call i32 @llvm.amdgcn.workitem.id.x()
%zext = zext i32 %lane to i64
- %old = atomicrmw sub ptr addrspace(1) %inout, i64 %zext acq_rel
+ %old = atomicrmw sub ptr addrspace(1) %inout, i64 %zext syncscope("agent") acq_rel
store i64 %old, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/dag-divergence-atomic.ll b/llvm/test/CodeGen/AMDGPU/dag-divergence-atomic.ll
index feaca01e520c79..2e56839587f329 100644
--- a/llvm/test/CodeGen/AMDGPU/dag-divergence-atomic.ll
+++ b/llvm/test/CodeGen/AMDGPU/dag-divergence-atomic.ll
@@ -20,7 +20,7 @@ define protected amdgpu_kernel void @add(ptr addrspace(1) %p, ptr addrspace(1) %
; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
; CHECK-NEXT: global_store_dword v[0:1], v2, off
; CHECK-NEXT: s_endpgm
- %n32 = atomicrmw add ptr addrspace(1) %p, i32 1 monotonic
+ %n32 = atomicrmw add ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic
%n64 = zext i32 %n32 to i64
%p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
store float 1.0, ptr addrspace(1) %p1
@@ -42,7 +42,7 @@ define protected amdgpu_kernel void @sub(ptr addrspace(1) %p, ptr addrspace(1) %
; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
; CHECK-NEXT: global_store_dword v[0:1], v2, off
; CHECK-NEXT: s_endpgm
- %n32 = atomicrmw sub ptr addrspace(1) %p, i32 1 monotonic
+ %n32 = atomicrmw sub ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic
%n64 = zext i32 %n32 to i64
%p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
store float 1.0, ptr addrspace(1) %p1
@@ -64,7 +64,7 @@ define protected amdgpu_kernel void @and(ptr addrspace(1) %p, ptr addrspace(1) %
; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
; CHECK-NEXT: global_store_dword v[0:1], v2, off
; CHECK-NEXT: s_endpgm
- %n32 = atomicrmw and ptr addrspace(1) %p, i32 1 monotonic
+ %n32 = atomicrmw and ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic
%n64 = zext i32 %n32 to i64
%p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
store float 1.0, ptr addrspace(1) %p1
@@ -86,7 +86,7 @@ define protected amdgpu_kernel void @or(ptr addrspace(1) %p, ptr addrspace(1) %q
; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
; CHECK-NEXT: global_store_dword v[0:1], v2, off
; CHECK-NEXT: s_endpgm
- %n32 = atomicrmw or ptr addrspace(1) %p, i32 1 monotonic
+ %n32 = atomicrmw or ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic
%n64 = zext i32 %n32 to i64
%p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
store float 1.0, ptr addrspace(1) %p1
@@ -108,7 +108,7 @@ define protected amdgpu_kernel void @xor(ptr addrspace(1) %p, ptr addrspace(1) %
; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
; CHECK-NEXT: global_store_dword v[0:1], v2, off
; CHECK-NEXT: s_endpgm
- %n32 = atomicrmw xor ptr addrspace(1) %p, i32 1 monotonic
+ %n32 = atomicrmw xor ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic
%n64 = zext i32 %n32 to i64
%p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
store float 1.0, ptr addrspace(1) %p1
@@ -144,7 +144,7 @@ define protected amdgpu_kernel void @nand(ptr addrspace(1) %p, ptr addrspace(1)
; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
; CHECK-NEXT: global_store_dword v[0:1], v2, off
; CHECK-NEXT: s_endpgm
- %n32 = atomicrmw nand ptr addrspace(1) %p, i32 1 monotonic
+ %n32 = atomicrmw nand ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic
%n64 = zext i32 %n32 to i64
%p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
store float 1.0, ptr addrspace(1) %p1
@@ -177,31 +177,18 @@ define protected amdgpu_kernel void @max(ptr addrspace(1) %p, ptr addrspace(1) %
; CHECK-LABEL: max:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; CHECK-NEXT: s_mov_b64 s[4:5], 0
-; CHECK-NEXT: v_mov_b32_e32 v1, 0
-; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: s_load_dword s6, s[0:1], 0x0
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: v_mov_b32_e32 v1, 1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: v_mov_b32_e32 v0, s6
-; CHECK-NEXT: .LBB7_1: ; %atomicrmw.start
-; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: v_mov_b32_e32 v3, v0
-; CHECK-NEXT: v_max_i32_e32 v2, 1, v3
-; CHECK-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] glc
+; CHECK-NEXT: global_atomic_smax v2, v0, v1, s[0:1] glc
+; CHECK-NEXT: v_mov_b32_e32 v0, s2
+; CHECK-NEXT: v_mov_b32_e32 v1, s3
; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3
-; CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; CHECK-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; CHECK-NEXT: s_cbranch_execnz .LBB7_1
-; CHECK-NEXT: ; %bb.2: ; %atomicrmw.end
-; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
-; CHECK-NEXT: v_mov_b32_e32 v2, s2
-; CHECK-NEXT: v_mov_b32_e32 v3, s3
-; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, v[2:3]
+; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1]
; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
; CHECK-NEXT: global_store_dword v[0:1], v2, off
; CHECK-NEXT: s_endpgm
- %n32 = atomicrmw max ptr addrspace(1) %p, i32 1 monotonic
+ %n32 = atomicrmw max ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic
%n64 = zext i32 %n32 to i64
%p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
store float 1.0, ptr addrspace(1) %p1
@@ -234,31 +221,18 @@ define protected amdgpu_kernel void @min(ptr addrspace(1) %p, ptr addrspace(1) %
; CHECK-LABEL: min:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; CHECK-NEXT: s_mov_b64 s[4:5], 0
-; CHECK-NEXT: v_mov_b32_e32 v1, 0
-; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: s_load_dword s6, s[0:1], 0x0
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: v_mov_b32_e32 v1, 1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: v_mov_b32_e32 v0, s6
-; CHECK-NEXT: .LBB9_1: ; %atomicrmw.start
-; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: v_mov_b32_e32 v3, v0
-; CHECK-NEXT: v_min_i32_e32 v2, 1, v3
-; CHECK-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] glc
+; CHECK-NEXT: global_atomic_smin v2, v0, v1, s[0:1] glc
+; CHECK-NEXT: v_mov_b32_e32 v0, s2
+; CHECK-NEXT: v_mov_b32_e32 v1, s3
; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3
-; CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; CHECK-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; CHECK-NEXT: s_cbranch_execnz .LBB9_1
-; CHECK-NEXT: ; %bb.2: ; %atomicrmw.end
-; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
-; CHECK-NEXT: v_mov_b32_e32 v2, s2
-; CHECK-NEXT: v_mov_b32_e32 v3, s3
-; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, v[2:3]
+; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1]
; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
; CHECK-NEXT: global_store_dword v[0:1], v2, off
; CHECK-NEXT: s_endpgm
- %n32 = atomicrmw min ptr addrspace(1) %p, i32 1 monotonic
+ %n32 = atomicrmw min ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic
%n64 = zext i32 %n32 to i64
%p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
store float 1.0, ptr addrspace(1) %p1
@@ -291,31 +265,18 @@ define protected amdgpu_kernel void @umax(ptr addrspace(1) %p, ptr addrspace(1)
; CHECK-LABEL: umax:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; CHECK-NEXT: s_mov_b64 s[4:5], 0
-; CHECK-NEXT: v_mov_b32_e32 v1, 0
-; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: s_load_dword s6, s[0:1], 0x0
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: v_mov_b32_e32 v1, 1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: v_mov_b32_e32 v0, s6
-; CHECK-NEXT: .LBB11_1: ; %atomicrmw.start
-; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: v_mov_b32_e32 v3, v0
-; CHECK-NEXT: v_max_u32_e32 v2, 1, v3
-; CHECK-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] glc
+; CHECK-NEXT: global_atomic_umax v2, v0, v1, s[0:1] glc
+; CHECK-NEXT: v_mov_b32_e32 v0, s2
+; CHECK-NEXT: v_mov_b32_e32 v1, s3
; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3
-; CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; CHECK-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; CHECK-NEXT: s_cbranch_execnz .LBB11_1
-; CHECK-NEXT: ; %bb.2: ; %atomicrmw.end
-; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
-; CHECK-NEXT: v_mov_b32_e32 v2, s2
-; CHECK-NEXT: v_mov_b32_e32 v3, s3
-; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, v[2:3]
+; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1]
; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
; CHECK-NEXT: global_store_dword v[0:1], v2, off
; CHECK-NEXT: s_endpgm
- %n32 = atomicrmw umax ptr addrspace(1) %p, i32 1 monotonic
+ %n32 = atomicrmw umax ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic
%n64 = zext i32 %n32 to i64
%p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
store float 1.0, ptr addrspace(1) %p1
@@ -348,31 +309,18 @@ define protected amdgpu_kernel void @umin(ptr addrspace(1) %p, ptr addrspace(1)
; CHECK-LABEL: umin:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; CHECK-NEXT: s_mov_b64 s[4:5], 0
-; CHECK-NEXT: v_mov_b32_e32 v1, 0
-; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: s_load_dword s6, s[0:1], 0x0
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: v_mov_b32_e32 v1, 1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: v_mov_b32_e32 v0, s6
-; CHECK-NEXT: .LBB13_1: ; %atomicrmw.start
-; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: v_mov_b32_e32 v3, v0
-; CHECK-NEXT: v_min_u32_e32 v2, 1, v3
-; CHECK-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] glc
+; CHECK-NEXT: global_atomic_umin v2, v0, v1, s[0:1] glc
+; CHECK-NEXT: v_mov_b32_e32 v0, s2
+; CHECK-NEXT: v_mov_b32_e32 v1, s3
; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3
-; CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; CHECK-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; CHECK-NEXT: s_cbranch_execnz .LBB13_1
-; CHECK-NEXT: ; %bb.2: ; %atomicrmw.end
-; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
-; CHECK-NEXT: v_mov_b32_e32 v2, s2
-; CHECK-NEXT: v_mov_b32_e32 v3, s3
-; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, v[2:3]
+; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1]
; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
; CHECK-NEXT: global_store_dword v[0:1], v2, off
; CHECK-NEXT: s_endpgm
- %n32 = atomicrmw umin ptr addrspace(1) %p, i32 1 monotonic
+ %n32 = atomicrmw umin ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic
%n64 = zext i32 %n32 to i64
%p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
store float 1.0, ptr addrspace(1) %p1
@@ -418,7 +366,7 @@ define protected amdgpu_kernel void @xchg(ptr addrspace(1) %p, ptr addrspace(1)
; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
; CHECK-NEXT: global_store_dword v[0:1], v2, off
; CHECK-NEXT: s_endpgm
- %n32 = atomicrmw xchg ptr addrspace(1) %p, i32 1 monotonic
+ %n32 = atomicrmw xchg ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic
%n64 = zext i32 %n32 to i64
%p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
store float 1.0, ptr addrspace(1) %p1
@@ -440,7 +388,7 @@ define protected amdgpu_kernel void @inc(ptr addrspace(1) %p, ptr addrspace(1) %
; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
; CHECK-NEXT: global_store_dword v[0:1], v2, off
; CHECK-NEXT: s_endpgm
- %n32 = atomicrmw uinc_wrap ptr addrspace(1) %p, i32 1 monotonic
+ %n32 = atomicrmw uinc_wrap ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic
%n64 = zext i32 %n32 to i64
%p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
store float 1.0, ptr addrspace(1) %p1
@@ -462,7 +410,7 @@ define protected amdgpu_kernel void @dec(ptr addrspace(1) %p, ptr addrspace(1) %
; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
; CHECK-NEXT: global_store_dword v[0:1], v2, off
; CHECK-NEXT: s_endpgm
- %n32 = atomicrmw udec_wrap ptr addrspace(1) %p, i32 1 monotonic
+ %n32 = atomicrmw udec_wrap ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic
%n64 = zext i32 %n32 to i64
%p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
store float 1.0, ptr addrspace(1) %p1
@@ -498,7 +446,7 @@ define protected amdgpu_kernel void @fadd(ptr addrspace(1) %p, ptr addrspace(1)
; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
; CHECK-NEXT: global_store_dword v[0:1], v2, off
; CHECK-NEXT: s_endpgm
- %f32 = atomicrmw fadd ptr addrspace(1) %p, float 1.0 monotonic
+ %f32 = atomicrmw fadd ptr addrspace(1) %p, float 1.0 syncscope("agent") monotonic
%n32 = fptoui float %f32 to i32
%n64 = zext i32 %n32 to i64
%p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
@@ -535,7 +483,7 @@ define protected amdgpu_kernel void @fsub(ptr addrspace(1) %p, ptr addrspace(1)
; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
; CHECK-NEXT: global_store_dword v[0:1], v2, off
; CHECK-NEXT: s_endpgm
- %f32 = atomicrmw fsub ptr addrspace(1) %p, float 1.0 monotonic
+ %f32 = atomicrmw fsub ptr addrspace(1) %p, float 1.0 syncscope("agent") monotonic
%n32 = fptoui float %f32 to i32
%n64 = zext i32 %n32 to i64
%p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0
diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics.ll
index b3046851c9ad10..451fa3ba89b7d6 100644
--- a/llvm/test/CodeGen/AMDGPU/flat_atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat_atomics.ll
@@ -51,7 +51,7 @@ define amdgpu_kernel void @atomic_add_i32_offset(ptr %out, i32 %in) {
; GCN3-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr %out, i32 4
- %val = atomicrmw volatile add ptr %gep, i32 %in seq_cst
+ %val = atomicrmw add ptr %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -103,7 +103,7 @@ define amdgpu_kernel void @atomic_add_i32_max_offset(ptr %out, i32 %in) {
; GCN3-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr %out, i32 1023
- %val = atomicrmw volatile add ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile add ptr %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -157,7 +157,7 @@ define amdgpu_kernel void @atomic_add_i32_max_offset_p1(ptr %out, i32 %in) {
; GCN3-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr %out, i32 1024
- %val = atomicrmw volatile add ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile add ptr %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -218,7 +218,7 @@ define amdgpu_kernel void @atomic_add_i32_ret_offset(ptr %out, ptr %out2, i32 %i
; GCN3-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr %out, i32 4
- %val = atomicrmw volatile add ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile add ptr %gep, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr %out2
ret void
}
@@ -284,7 +284,7 @@ define amdgpu_kernel void @atomic_add_i32_addr64_offset(ptr %out, i32 %in, i64 %
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
%gep = getelementptr i32, ptr %ptr, i32 4
- %val = atomicrmw volatile add ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile add ptr %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -358,7 +358,7 @@ define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(ptr %out, ptr %out2,
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
%gep = getelementptr i32, ptr %ptr, i32 4
- %val = atomicrmw volatile add ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile add ptr %gep, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr %out2
ret void
}
@@ -406,7 +406,7 @@ define amdgpu_kernel void @atomic_add_i32(ptr %out, i32 %in) {
; GCN3-NEXT: buffer_wbinvl1_vol
; GCN3-NEXT: s_endpgm
entry:
- %val = atomicrmw volatile add ptr %out, i32 %in seq_cst
+ %val = atomicrmw volatile add ptr %out, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -462,7 +462,7 @@ define amdgpu_kernel void @atomic_add_i32_ret(ptr %out, ptr %out2, i32 %in) {
; GCN3-NEXT: flat_store_dword v[0:1], v2
; GCN3-NEXT: s_endpgm
entry:
- %val = atomicrmw volatile add ptr %out, i32 %in seq_cst
+ %val = atomicrmw volatile add ptr %out, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr %out2
ret void
}
@@ -523,7 +523,7 @@ define amdgpu_kernel void @atomic_add_i32_addr64(ptr %out, i32 %in, i64 %index)
; GCN3-NEXT: s_endpgm
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
- %val = atomicrmw volatile add ptr %ptr, i32 %in seq_cst
+ %val = atomicrmw volatile add ptr %ptr, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -592,7 +592,7 @@ define amdgpu_kernel void @atomic_add_i32_ret_addr64(ptr %out, ptr %out2, i32 %i
; GCN3-NEXT: s_endpgm
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
- %val = atomicrmw volatile add ptr %ptr, i32 %in seq_cst
+ %val = atomicrmw volatile add ptr %ptr, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr %out2
ret void
}
@@ -645,7 +645,7 @@ define amdgpu_kernel void @atomic_and_i32_offset(ptr %out, i32 %in) {
; GCN3-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr %out, i32 4
- %val = atomicrmw volatile and ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile and ptr %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -706,7 +706,7 @@ define amdgpu_kernel void @atomic_and_i32_ret_offset(ptr %out, ptr %out2, i32 %i
; GCN3-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr %out, i32 4
- %val = atomicrmw volatile and ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile and ptr %gep, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr %out2
ret void
}
@@ -772,7 +772,7 @@ define amdgpu_kernel void @atomic_and_i32_addr64_offset(ptr %out, i32 %in, i64 %
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
%gep = getelementptr i32, ptr %ptr, i32 4
- %val = atomicrmw volatile and ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile and ptr %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -846,7 +846,7 @@ define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(ptr %out, ptr %out2,
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
%gep = getelementptr i32, ptr %ptr, i32 4
- %val = atomicrmw volatile and ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile and ptr %gep, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr %out2
ret void
}
@@ -894,7 +894,7 @@ define amdgpu_kernel void @atomic_and_i32(ptr %out, i32 %in) {
; GCN3-NEXT: buffer_wbinvl1_vol
; GCN3-NEXT: s_endpgm
entry:
- %val = atomicrmw volatile and ptr %out, i32 %in seq_cst
+ %val = atomicrmw volatile and ptr %out, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -950,7 +950,7 @@ define amdgpu_kernel void @atomic_and_i32_ret(ptr %out, ptr %out2, i32 %in) {
; GCN3-NEXT: flat_store_dword v[0:1], v2
; GCN3-NEXT: s_endpgm
entry:
- %val = atomicrmw volatile and ptr %out, i32 %in seq_cst
+ %val = atomicrmw volatile and ptr %out, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr %out2
ret void
}
@@ -1011,7 +1011,7 @@ define amdgpu_kernel void @atomic_and_i32_addr64(ptr %out, i32 %in, i64 %index)
; GCN3-NEXT: s_endpgm
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
- %val = atomicrmw volatile and ptr %ptr, i32 %in seq_cst
+ %val = atomicrmw volatile and ptr %ptr, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -1080,7 +1080,7 @@ define amdgpu_kernel void @atomic_and_i32_ret_addr64(ptr %out, ptr %out2, i32 %i
; GCN3-NEXT: s_endpgm
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
- %val = atomicrmw volatile and ptr %ptr, i32 %in seq_cst
+ %val = atomicrmw volatile and ptr %ptr, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr %out2
ret void
}
@@ -1133,7 +1133,7 @@ define amdgpu_kernel void @atomic_sub_i32_offset(ptr %out, i32 %in) {
; GCN3-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr %out, i32 4
- %val = atomicrmw volatile sub ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile sub ptr %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -1194,7 +1194,7 @@ define amdgpu_kernel void @atomic_sub_i32_ret_offset(ptr %out, ptr %out2, i32 %i
; GCN3-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr %out, i32 4
- %val = atomicrmw volatile sub ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile sub ptr %gep, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr %out2
ret void
}
@@ -1260,7 +1260,7 @@ define amdgpu_kernel void @atomic_sub_i32_addr64_offset(ptr %out, i32 %in, i64 %
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
%gep = getelementptr i32, ptr %ptr, i32 4
- %val = atomicrmw volatile sub ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile sub ptr %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -1334,7 +1334,7 @@ define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(ptr %out, ptr %out2,
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
%gep = getelementptr i32, ptr %ptr, i32 4
- %val = atomicrmw volatile sub ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile sub ptr %gep, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr %out2
ret void
}
@@ -1382,7 +1382,7 @@ define amdgpu_kernel void @atomic_sub_i32(ptr %out, i32 %in) {
; GCN3-NEXT: buffer_wbinvl1_vol
; GCN3-NEXT: s_endpgm
entry:
- %val = atomicrmw volatile sub ptr %out, i32 %in seq_cst
+ %val = atomicrmw volatile sub ptr %out, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -1438,7 +1438,7 @@ define amdgpu_kernel void @atomic_sub_i32_ret(ptr %out, ptr %out2, i32 %in) {
; GCN3-NEXT: flat_store_dword v[0:1], v2
; GCN3-NEXT: s_endpgm
entry:
- %val = atomicrmw volatile sub ptr %out, i32 %in seq_cst
+ %val = atomicrmw volatile sub ptr %out, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr %out2
ret void
}
@@ -1499,7 +1499,7 @@ define amdgpu_kernel void @atomic_sub_i32_addr64(ptr %out, i32 %in, i64 %index)
; GCN3-NEXT: s_endpgm
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
- %val = atomicrmw volatile sub ptr %ptr, i32 %in seq_cst
+ %val = atomicrmw volatile sub ptr %ptr, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -1568,7 +1568,7 @@ define amdgpu_kernel void @atomic_sub_i32_ret_addr64(ptr %out, ptr %out2, i32 %i
; GCN3-NEXT: s_endpgm
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
- %val = atomicrmw volatile sub ptr %ptr, i32 %in seq_cst
+ %val = atomicrmw volatile sub ptr %ptr, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr %out2
ret void
}
@@ -3525,7 +3525,7 @@ define amdgpu_kernel void @atomic_or_i32_offset(ptr %out, i32 %in) {
; GCN3-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr %out, i32 4
- %val = atomicrmw volatile or ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile or ptr %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -3586,7 +3586,7 @@ define amdgpu_kernel void @atomic_or_i32_ret_offset(ptr %out, ptr %out2, i32 %in
; GCN3-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr %out, i32 4
- %val = atomicrmw volatile or ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile or ptr %gep, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr %out2
ret void
}
@@ -3652,7 +3652,7 @@ define amdgpu_kernel void @atomic_or_i32_addr64_offset(ptr %out, i32 %in, i64 %i
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
%gep = getelementptr i32, ptr %ptr, i32 4
- %val = atomicrmw volatile or ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile or ptr %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -3726,7 +3726,7 @@ define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(ptr %out, ptr %out2,
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
%gep = getelementptr i32, ptr %ptr, i32 4
- %val = atomicrmw volatile or ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile or ptr %gep, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr %out2
ret void
}
@@ -3774,7 +3774,7 @@ define amdgpu_kernel void @atomic_or_i32(ptr %out, i32 %in) {
; GCN3-NEXT: buffer_wbinvl1_vol
; GCN3-NEXT: s_endpgm
entry:
- %val = atomicrmw volatile or ptr %out, i32 %in seq_cst
+ %val = atomicrmw volatile or ptr %out, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -3830,7 +3830,7 @@ define amdgpu_kernel void @atomic_or_i32_ret(ptr %out, ptr %out2, i32 %in) {
; GCN3-NEXT: flat_store_dword v[0:1], v2
; GCN3-NEXT: s_endpgm
entry:
- %val = atomicrmw volatile or ptr %out, i32 %in seq_cst
+ %val = atomicrmw volatile or ptr %out, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr %out2
ret void
}
@@ -3891,7 +3891,7 @@ define amdgpu_kernel void @atomic_or_i32_addr64(ptr %out, i32 %in, i64 %index) {
; GCN3-NEXT: s_endpgm
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
- %val = atomicrmw volatile or ptr %ptr, i32 %in seq_cst
+ %val = atomicrmw volatile or ptr %ptr, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -3960,7 +3960,7 @@ define amdgpu_kernel void @atomic_or_i32_ret_addr64(ptr %out, ptr %out2, i32 %in
; GCN3-NEXT: s_endpgm
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
- %val = atomicrmw volatile or ptr %ptr, i32 %in seq_cst
+ %val = atomicrmw volatile or ptr %ptr, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr %out2
ret void
}
@@ -4013,7 +4013,7 @@ define amdgpu_kernel void @atomic_xchg_i32_offset(ptr %out, i32 %in) {
; GCN3-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr %out, i32 4
- %val = atomicrmw volatile xchg ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile xchg ptr %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -4065,7 +4065,7 @@ define amdgpu_kernel void @atomic_xchg_f32_offset(ptr %out, float %in) {
; GCN3-NEXT: s_endpgm
entry:
%gep = getelementptr float, ptr %out, i32 4
- %val = atomicrmw volatile xchg ptr %gep, float %in seq_cst
+ %val = atomicrmw volatile xchg ptr %gep, float %in syncscope("agent") seq_cst
ret void
}
@@ -4126,7 +4126,7 @@ define amdgpu_kernel void @atomic_xchg_i32_ret_offset(ptr %out, ptr %out2, i32 %
; GCN3-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr %out, i32 4
- %val = atomicrmw volatile xchg ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile xchg ptr %gep, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr %out2
ret void
}
@@ -4192,7 +4192,7 @@ define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(ptr %out, i32 %in, i64
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
%gep = getelementptr i32, ptr %ptr, i32 4
- %val = atomicrmw volatile xchg ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile xchg ptr %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -4266,7 +4266,7 @@ define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(ptr %out, ptr %out2
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
%gep = getelementptr i32, ptr %ptr, i32 4
- %val = atomicrmw volatile xchg ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile xchg ptr %gep, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr %out2
ret void
}
@@ -4314,7 +4314,7 @@ define amdgpu_kernel void @atomic_xchg_i32(ptr %out, i32 %in) {
; GCN3-NEXT: buffer_wbinvl1_vol
; GCN3-NEXT: s_endpgm
entry:
- %val = atomicrmw volatile xchg ptr %out, i32 %in seq_cst
+ %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -4370,7 +4370,7 @@ define amdgpu_kernel void @atomic_xchg_i32_ret(ptr %out, ptr %out2, i32 %in) {
; GCN3-NEXT: flat_store_dword v[0:1], v2
; GCN3-NEXT: s_endpgm
entry:
- %val = atomicrmw volatile xchg ptr %out, i32 %in seq_cst
+ %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr %out2
ret void
}
@@ -4431,7 +4431,7 @@ define amdgpu_kernel void @atomic_xchg_i32_addr64(ptr %out, i32 %in, i64 %index)
; GCN3-NEXT: s_endpgm
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
- %val = atomicrmw volatile xchg ptr %ptr, i32 %in seq_cst
+ %val = atomicrmw volatile xchg ptr %ptr, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -4500,7 +4500,7 @@ define amdgpu_kernel void @atomic_xchg_i32_ret_addr64(ptr %out, ptr %out2, i32 %
; GCN3-NEXT: s_endpgm
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
- %val = atomicrmw volatile xchg ptr %ptr, i32 %in seq_cst
+ %val = atomicrmw volatile xchg ptr %ptr, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr %out2
ret void
}
@@ -4555,7 +4555,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i32_offset(ptr %out, i32 %in, i32 %old
; GCN3-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr %out, i32 4
- %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
ret void
}
@@ -4619,7 +4619,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i32_ret_offset(ptr %out, ptr %out2, i3
; GCN3-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr %out, i32 4
- %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
%flag = extractvalue { i32, i1 } %val, 0
store i32 %flag, ptr %out2
ret void
@@ -4692,7 +4692,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(ptr %out, i32 %in, i
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
%gep = getelementptr i32, ptr %ptr, i32 4
- %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
ret void
}
@@ -4772,7 +4772,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(ptr %out, ptr %o
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
%gep = getelementptr i32, ptr %ptr, i32 4
- %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
%flag = extractvalue { i32, i1 } %val, 0
store i32 %flag, ptr %out2
ret void
@@ -4821,7 +4821,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i32(ptr %out, i32 %in, i32 %old) {
; GCN3-NEXT: buffer_wbinvl1_vol
; GCN3-NEXT: s_endpgm
entry:
- %val = cmpxchg volatile ptr %out, i32 %old, i32 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr %out, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
ret void
}
@@ -4880,7 +4880,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i32_ret(ptr %out, ptr %out2, i32 %in,
; GCN3-NEXT: flat_store_dword v[0:1], v2
; GCN3-NEXT: s_endpgm
entry:
- %val = cmpxchg volatile ptr %out, i32 %old, i32 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr %out, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
%flag = extractvalue { i32, i1 } %val, 0
store i32 %flag, ptr %out2
ret void
@@ -4948,7 +4948,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(ptr %out, i32 %in, i64 %ind
; GCN3-NEXT: s_endpgm
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
- %val = cmpxchg volatile ptr %ptr, i32 %old, i32 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr %ptr, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
ret void
}
@@ -5023,7 +5023,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64(ptr %out, ptr %out2, i3
; GCN3-NEXT: s_endpgm
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
- %val = cmpxchg volatile ptr %ptr, i32 %old, i32 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr %ptr, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
%flag = extractvalue { i32, i1 } %val, 0
store i32 %flag, ptr %out2
ret void
@@ -5077,7 +5077,7 @@ define amdgpu_kernel void @atomic_xor_i32_offset(ptr %out, i32 %in) {
; GCN3-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr %out, i32 4
- %val = atomicrmw volatile xor ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile xor ptr %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -5138,7 +5138,7 @@ define amdgpu_kernel void @atomic_xor_i32_ret_offset(ptr %out, ptr %out2, i32 %i
; GCN3-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr %out, i32 4
- %val = atomicrmw volatile xor ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile xor ptr %gep, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr %out2
ret void
}
@@ -5204,7 +5204,7 @@ define amdgpu_kernel void @atomic_xor_i32_addr64_offset(ptr %out, i32 %in, i64 %
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
%gep = getelementptr i32, ptr %ptr, i32 4
- %val = atomicrmw volatile xor ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile xor ptr %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -5278,7 +5278,7 @@ define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(ptr %out, ptr %out2,
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
%gep = getelementptr i32, ptr %ptr, i32 4
- %val = atomicrmw volatile xor ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile xor ptr %gep, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr %out2
ret void
}
@@ -5326,7 +5326,7 @@ define amdgpu_kernel void @atomic_xor_i32(ptr %out, i32 %in) {
; GCN3-NEXT: buffer_wbinvl1_vol
; GCN3-NEXT: s_endpgm
entry:
- %val = atomicrmw volatile xor ptr %out, i32 %in seq_cst
+ %val = atomicrmw volatile xor ptr %out, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -5382,7 +5382,7 @@ define amdgpu_kernel void @atomic_xor_i32_ret(ptr %out, ptr %out2, i32 %in) {
; GCN3-NEXT: flat_store_dword v[0:1], v2
; GCN3-NEXT: s_endpgm
entry:
- %val = atomicrmw volatile xor ptr %out, i32 %in seq_cst
+ %val = atomicrmw volatile xor ptr %out, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr %out2
ret void
}
@@ -5443,7 +5443,7 @@ define amdgpu_kernel void @atomic_xor_i32_addr64(ptr %out, i32 %in, i64 %index)
; GCN3-NEXT: s_endpgm
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
- %val = atomicrmw volatile xor ptr %ptr, i32 %in seq_cst
+ %val = atomicrmw volatile xor ptr %ptr, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -5512,7 +5512,7 @@ define amdgpu_kernel void @atomic_xor_i32_ret_addr64(ptr %out, ptr %out2, i32 %i
; GCN3-NEXT: s_endpgm
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
- %val = atomicrmw volatile xor ptr %ptr, i32 %in seq_cst
+ %val = atomicrmw volatile xor ptr %ptr, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr %out2
ret void
}
@@ -7152,7 +7152,7 @@ define amdgpu_kernel void @atomic_inc_i32_offset(ptr %out, i32 %in) {
; GCN3-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr %out, i32 4
- %val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -7204,7 +7204,7 @@ define amdgpu_kernel void @atomic_inc_i32_max_offset(ptr %out, i32 %in) {
; GCN3-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr %out, i32 1023
- %val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -7258,7 +7258,7 @@ define amdgpu_kernel void @atomic_inc_i32_max_offset_p1(ptr %out, i32 %in) {
; GCN3-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr %out, i32 1024
- %val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -7319,7 +7319,7 @@ define amdgpu_kernel void @atomic_inc_i32_ret_offset(ptr %out, ptr %out2, i32 %i
; GCN3-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr %out, i32 4
- %val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr %out2
ret void
}
@@ -7385,7 +7385,7 @@ define amdgpu_kernel void @atomic_inc_i32_incr64_offset(ptr %out, i32 %in, i64 %
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
%gep = getelementptr i32, ptr %ptr, i32 4
- %val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -7459,7 +7459,7 @@ define amdgpu_kernel void @atomic_inc_i32_ret_incr64_offset(ptr %out, ptr %out2,
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
%gep = getelementptr i32, ptr %ptr, i32 4
- %val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr %out2
ret void
}
@@ -7507,7 +7507,7 @@ define amdgpu_kernel void @atomic_inc_i32(ptr %out, i32 %in) {
; GCN3-NEXT: buffer_wbinvl1_vol
; GCN3-NEXT: s_endpgm
entry:
- %val = atomicrmw volatile uinc_wrap ptr %out, i32 %in seq_cst
+ %val = atomicrmw volatile uinc_wrap ptr %out, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -7563,7 +7563,7 @@ define amdgpu_kernel void @atomic_inc_i32_ret(ptr %out, ptr %out2, i32 %in) {
; GCN3-NEXT: flat_store_dword v[0:1], v2
; GCN3-NEXT: s_endpgm
entry:
- %val = atomicrmw volatile uinc_wrap ptr %out, i32 %in seq_cst
+ %val = atomicrmw volatile uinc_wrap ptr %out, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr %out2
ret void
}
@@ -7624,7 +7624,7 @@ define amdgpu_kernel void @atomic_inc_i32_incr64(ptr %out, i32 %in, i64 %index)
; GCN3-NEXT: s_endpgm
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
- %val = atomicrmw volatile uinc_wrap ptr %ptr, i32 %in seq_cst
+ %val = atomicrmw volatile uinc_wrap ptr %ptr, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -7693,7 +7693,7 @@ define amdgpu_kernel void @atomic_inc_i32_ret_incr64(ptr %out, ptr %out2, i32 %i
; GCN3-NEXT: s_endpgm
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
- %val = atomicrmw volatile uinc_wrap ptr %ptr, i32 %in seq_cst
+ %val = atomicrmw volatile uinc_wrap ptr %ptr, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr %out2
ret void
}
@@ -7746,7 +7746,7 @@ define amdgpu_kernel void @atomic_dec_i32_offset(ptr %out, i32 %in) {
; GCN3-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr %out, i32 4
- %val = atomicrmw volatile udec_wrap ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile udec_wrap ptr %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -7798,7 +7798,7 @@ define amdgpu_kernel void @atomic_dec_i32_max_offset(ptr %out, i32 %in) {
; GCN3-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr %out, i32 1023
- %val = atomicrmw volatile udec_wrap ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile udec_wrap ptr %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -7852,7 +7852,7 @@ define amdgpu_kernel void @atomic_dec_i32_max_offset_p1(ptr %out, i32 %in) {
; GCN3-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr %out, i32 1024
- %val = atomicrmw volatile udec_wrap ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile udec_wrap ptr %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -7913,7 +7913,7 @@ define amdgpu_kernel void @atomic_dec_i32_ret_offset(ptr %out, ptr %out2, i32 %i
; GCN3-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr %out, i32 4
- %val = atomicrmw volatile udec_wrap ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile udec_wrap ptr %gep, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr %out2
ret void
}
@@ -7979,7 +7979,7 @@ define amdgpu_kernel void @atomic_dec_i32_decr64_offset(ptr %out, i32 %in, i64 %
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
%gep = getelementptr i32, ptr %ptr, i32 4
- %val = atomicrmw volatile udec_wrap ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile udec_wrap ptr %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -8053,7 +8053,7 @@ define amdgpu_kernel void @atomic_dec_i32_ret_decr64_offset(ptr %out, ptr %out2,
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
%gep = getelementptr i32, ptr %ptr, i32 4
- %val = atomicrmw volatile udec_wrap ptr %gep, i32 %in seq_cst
+ %val = atomicrmw volatile udec_wrap ptr %gep, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr %out2
ret void
}
@@ -8101,7 +8101,7 @@ define amdgpu_kernel void @atomic_dec_i32(ptr %out, i32 %in) {
; GCN3-NEXT: buffer_wbinvl1_vol
; GCN3-NEXT: s_endpgm
entry:
- %val = atomicrmw volatile udec_wrap ptr %out, i32 %in seq_cst
+ %val = atomicrmw volatile udec_wrap ptr %out, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -8157,7 +8157,7 @@ define amdgpu_kernel void @atomic_dec_i32_ret(ptr %out, ptr %out2, i32 %in) {
; GCN3-NEXT: flat_store_dword v[0:1], v2
; GCN3-NEXT: s_endpgm
entry:
- %val = atomicrmw volatile udec_wrap ptr %out, i32 %in seq_cst
+ %val = atomicrmw volatile udec_wrap ptr %out, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr %out2
ret void
}
@@ -8218,7 +8218,7 @@ define amdgpu_kernel void @atomic_dec_i32_decr64(ptr %out, i32 %in, i64 %index)
; GCN3-NEXT: s_endpgm
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
- %val = atomicrmw volatile udec_wrap ptr %ptr, i32 %in seq_cst
+ %val = atomicrmw volatile udec_wrap ptr %ptr, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -8287,7 +8287,7 @@ define amdgpu_kernel void @atomic_dec_i32_ret_decr64(ptr %out, ptr %out2, i32 %i
; GCN3-NEXT: s_endpgm
entry:
%ptr = getelementptr i32, ptr %out, i64 %index
- %val = atomicrmw volatile udec_wrap ptr %ptr, i32 %in seq_cst
+ %val = atomicrmw volatile udec_wrap ptr %ptr, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr %out2
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll
index fea4645e6cd488..3186a06be22fbe 100644
--- a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll
@@ -36,7 +36,7 @@ define amdgpu_kernel void @atomic_add_i64_offset(ptr %out, i64 %in) {
; GCN2-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
- %tmp0 = atomicrmw volatile add ptr %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile add ptr %gep, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -82,7 +82,7 @@ define amdgpu_kernel void @atomic_add_i64_ret_offset(ptr %out, ptr %out2, i64 %i
; GCN2-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
- %tmp0 = atomicrmw volatile add ptr %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile add ptr %gep, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr %out2
ret void
}
@@ -130,7 +130,7 @@ define amdgpu_kernel void @atomic_add_i64_addr64_offset(ptr %out, i64 %in, i64 %
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
%gep = getelementptr i64, ptr %ptr, i64 4
- %tmp0 = atomicrmw volatile add ptr %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile add ptr %gep, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -181,7 +181,7 @@ define amdgpu_kernel void @atomic_add_i64_ret_addr64_offset(ptr %out, ptr %out2,
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
%gep = getelementptr i64, ptr %ptr, i64 4
- %tmp0 = atomicrmw volatile add ptr %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile add ptr %gep, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr %out2
ret void
}
@@ -215,7 +215,7 @@ define amdgpu_kernel void @atomic_add_i64(ptr %out, i64 %in) {
; GCN2-NEXT: buffer_wbinvl1_vol
; GCN2-NEXT: s_endpgm
entry:
- %tmp0 = atomicrmw volatile add ptr %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile add ptr %out, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -256,7 +256,7 @@ define amdgpu_kernel void @atomic_add_i64_ret(ptr %out, ptr %out2, i64 %in) {
; GCN2-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GCN2-NEXT: s_endpgm
entry:
- %tmp0 = atomicrmw volatile add ptr %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile add ptr %out, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr %out2
ret void
}
@@ -299,7 +299,7 @@ define amdgpu_kernel void @atomic_add_i64_addr64(ptr %out, i64 %in, i64 %index)
; GCN2-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
- %tmp0 = atomicrmw volatile add ptr %ptr, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile add ptr %ptr, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -345,7 +345,7 @@ define amdgpu_kernel void @atomic_add_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
; GCN2-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
- %tmp0 = atomicrmw volatile add ptr %ptr, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile add ptr %ptr, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr %out2
ret void
}
@@ -384,7 +384,7 @@ define amdgpu_kernel void @atomic_and_i64_offset(ptr %out, i64 %in) {
; GCN2-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
- %tmp0 = atomicrmw volatile and ptr %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile and ptr %gep, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -430,7 +430,7 @@ define amdgpu_kernel void @atomic_and_i64_ret_offset(ptr %out, ptr %out2, i64 %i
; GCN2-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
- %tmp0 = atomicrmw volatile and ptr %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile and ptr %gep, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr %out2
ret void
}
@@ -478,7 +478,7 @@ define amdgpu_kernel void @atomic_and_i64_addr64_offset(ptr %out, i64 %in, i64 %
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
%gep = getelementptr i64, ptr %ptr, i64 4
- %tmp0 = atomicrmw volatile and ptr %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile and ptr %gep, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -529,7 +529,7 @@ define amdgpu_kernel void @atomic_and_i64_ret_addr64_offset(ptr %out, ptr %out2,
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
%gep = getelementptr i64, ptr %ptr, i64 4
- %tmp0 = atomicrmw volatile and ptr %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile and ptr %gep, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr %out2
ret void
}
@@ -563,7 +563,7 @@ define amdgpu_kernel void @atomic_and_i64(ptr %out, i64 %in) {
; GCN2-NEXT: buffer_wbinvl1_vol
; GCN2-NEXT: s_endpgm
entry:
- %tmp0 = atomicrmw volatile and ptr %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile and ptr %out, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -604,7 +604,7 @@ define amdgpu_kernel void @atomic_and_i64_ret(ptr %out, ptr %out2, i64 %in) {
; GCN2-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GCN2-NEXT: s_endpgm
entry:
- %tmp0 = atomicrmw volatile and ptr %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile and ptr %out, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr %out2
ret void
}
@@ -647,7 +647,7 @@ define amdgpu_kernel void @atomic_and_i64_addr64(ptr %out, i64 %in, i64 %index)
; GCN2-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
- %tmp0 = atomicrmw volatile and ptr %ptr, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile and ptr %ptr, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -693,7 +693,7 @@ define amdgpu_kernel void @atomic_and_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
; GCN2-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
- %tmp0 = atomicrmw volatile and ptr %ptr, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile and ptr %ptr, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr %out2
ret void
}
@@ -732,7 +732,7 @@ define amdgpu_kernel void @atomic_sub_i64_offset(ptr %out, i64 %in) {
; GCN2-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
- %tmp0 = atomicrmw volatile sub ptr %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile sub ptr %gep, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -778,7 +778,7 @@ define amdgpu_kernel void @atomic_sub_i64_ret_offset(ptr %out, ptr %out2, i64 %i
; GCN2-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
- %tmp0 = atomicrmw volatile sub ptr %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile sub ptr %gep, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr %out2
ret void
}
@@ -826,7 +826,7 @@ define amdgpu_kernel void @atomic_sub_i64_addr64_offset(ptr %out, i64 %in, i64 %
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
%gep = getelementptr i64, ptr %ptr, i64 4
- %tmp0 = atomicrmw volatile sub ptr %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile sub ptr %gep, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -877,7 +877,7 @@ define amdgpu_kernel void @atomic_sub_i64_ret_addr64_offset(ptr %out, ptr %out2,
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
%gep = getelementptr i64, ptr %ptr, i64 4
- %tmp0 = atomicrmw volatile sub ptr %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile sub ptr %gep, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr %out2
ret void
}
@@ -911,7 +911,7 @@ define amdgpu_kernel void @atomic_sub_i64(ptr %out, i64 %in) {
; GCN2-NEXT: buffer_wbinvl1_vol
; GCN2-NEXT: s_endpgm
entry:
- %tmp0 = atomicrmw volatile sub ptr %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile sub ptr %out, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -952,7 +952,7 @@ define amdgpu_kernel void @atomic_sub_i64_ret(ptr %out, ptr %out2, i64 %in) {
; GCN2-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GCN2-NEXT: s_endpgm
entry:
- %tmp0 = atomicrmw volatile sub ptr %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile sub ptr %out, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr %out2
ret void
}
@@ -995,7 +995,7 @@ define amdgpu_kernel void @atomic_sub_i64_addr64(ptr %out, i64 %in, i64 %index)
; GCN2-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
- %tmp0 = atomicrmw volatile sub ptr %ptr, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile sub ptr %ptr, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -1041,7 +1041,7 @@ define amdgpu_kernel void @atomic_sub_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
; GCN2-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
- %tmp0 = atomicrmw volatile sub ptr %ptr, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile sub ptr %ptr, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr %out2
ret void
}
@@ -2440,7 +2440,7 @@ define amdgpu_kernel void @atomic_or_i64_offset(ptr %out, i64 %in) {
; GCN2-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
- %tmp0 = atomicrmw volatile or ptr %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile or ptr %gep, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -2486,7 +2486,7 @@ define amdgpu_kernel void @atomic_or_i64_ret_offset(ptr %out, ptr %out2, i64 %in
; GCN2-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
- %tmp0 = atomicrmw volatile or ptr %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile or ptr %gep, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr %out2
ret void
}
@@ -2534,7 +2534,7 @@ define amdgpu_kernel void @atomic_or_i64_addr64_offset(ptr %out, i64 %in, i64 %i
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
%gep = getelementptr i64, ptr %ptr, i64 4
- %tmp0 = atomicrmw volatile or ptr %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile or ptr %gep, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -2585,7 +2585,7 @@ define amdgpu_kernel void @atomic_or_i64_ret_addr64_offset(ptr %out, ptr %out2,
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
%gep = getelementptr i64, ptr %ptr, i64 4
- %tmp0 = atomicrmw volatile or ptr %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile or ptr %gep, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr %out2
ret void
}
@@ -2619,7 +2619,7 @@ define amdgpu_kernel void @atomic_or_i64(ptr %out, i64 %in) {
; GCN2-NEXT: buffer_wbinvl1_vol
; GCN2-NEXT: s_endpgm
entry:
- %tmp0 = atomicrmw volatile or ptr %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile or ptr %out, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -2660,7 +2660,7 @@ define amdgpu_kernel void @atomic_or_i64_ret(ptr %out, ptr %out2, i64 %in) {
; GCN2-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GCN2-NEXT: s_endpgm
entry:
- %tmp0 = atomicrmw volatile or ptr %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile or ptr %out, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr %out2
ret void
}
@@ -2703,7 +2703,7 @@ define amdgpu_kernel void @atomic_or_i64_addr64(ptr %out, i64 %in, i64 %index) {
; GCN2-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
- %tmp0 = atomicrmw volatile or ptr %ptr, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile or ptr %ptr, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -2749,7 +2749,7 @@ define amdgpu_kernel void @atomic_or_i64_ret_addr64(ptr %out, ptr %out2, i64 %in
; GCN2-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
- %tmp0 = atomicrmw volatile or ptr %ptr, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile or ptr %ptr, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr %out2
ret void
}
@@ -2788,7 +2788,7 @@ define amdgpu_kernel void @atomic_xchg_i64_offset(ptr %out, i64 %in) {
; GCN2-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
- %tmp0 = atomicrmw volatile xchg ptr %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xchg ptr %gep, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -2826,7 +2826,7 @@ define amdgpu_kernel void @atomic_xchg_f64_offset(ptr %out, double %in) {
; GCN2-NEXT: s_endpgm
entry:
%gep = getelementptr double, ptr %out, i64 4
- %tmp0 = atomicrmw volatile xchg ptr %gep, double %in seq_cst
+ %tmp0 = atomicrmw volatile xchg ptr %gep, double %in syncscope("agent") seq_cst
ret void
}
@@ -2864,7 +2864,7 @@ define amdgpu_kernel void @atomic_xchg_pointer_offset(ptr %out, ptr %in) {
; GCN2-NEXT: s_endpgm
entry:
%gep = getelementptr ptr, ptr %out, i32 4
- %val = atomicrmw volatile xchg ptr %gep, ptr %in seq_cst
+ %val = atomicrmw volatile xchg ptr %gep, ptr %in syncscope("agent") seq_cst
ret void
}
@@ -2910,7 +2910,7 @@ define amdgpu_kernel void @atomic_xchg_i64_ret_offset(ptr %out, ptr %out2, i64 %
; GCN2-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
- %tmp0 = atomicrmw volatile xchg ptr %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xchg ptr %gep, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr %out2
ret void
}
@@ -2958,7 +2958,7 @@ define amdgpu_kernel void @atomic_xchg_i64_addr64_offset(ptr %out, i64 %in, i64
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
%gep = getelementptr i64, ptr %ptr, i64 4
- %tmp0 = atomicrmw volatile xchg ptr %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xchg ptr %gep, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -3009,7 +3009,7 @@ define amdgpu_kernel void @atomic_xchg_i64_ret_addr64_offset(ptr %out, ptr %out2
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
%gep = getelementptr i64, ptr %ptr, i64 4
- %tmp0 = atomicrmw volatile xchg ptr %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xchg ptr %gep, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr %out2
ret void
}
@@ -3043,7 +3043,7 @@ define amdgpu_kernel void @atomic_xchg_i64(ptr %out, i64 %in) {
; GCN2-NEXT: buffer_wbinvl1_vol
; GCN2-NEXT: s_endpgm
entry:
- %tmp0 = atomicrmw volatile xchg ptr %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xchg ptr %out, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -3084,7 +3084,7 @@ define amdgpu_kernel void @atomic_xchg_i64_ret(ptr %out, ptr %out2, i64 %in) {
; GCN2-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GCN2-NEXT: s_endpgm
entry:
- %tmp0 = atomicrmw volatile xchg ptr %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xchg ptr %out, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr %out2
ret void
}
@@ -3127,7 +3127,7 @@ define amdgpu_kernel void @atomic_xchg_i64_addr64(ptr %out, i64 %in, i64 %index)
; GCN2-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
- %tmp0 = atomicrmw volatile xchg ptr %ptr, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xchg ptr %ptr, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -3173,7 +3173,7 @@ define amdgpu_kernel void @atomic_xchg_i64_ret_addr64(ptr %out, ptr %out2, i64 %
; GCN2-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
- %tmp0 = atomicrmw volatile xchg ptr %ptr, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xchg ptr %ptr, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr %out2
ret void
}
@@ -3212,7 +3212,7 @@ define amdgpu_kernel void @atomic_xor_i64_offset(ptr %out, i64 %in) {
; GCN2-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
- %tmp0 = atomicrmw volatile xor ptr %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xor ptr %gep, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -3258,7 +3258,7 @@ define amdgpu_kernel void @atomic_xor_i64_ret_offset(ptr %out, ptr %out2, i64 %i
; GCN2-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
- %tmp0 = atomicrmw volatile xor ptr %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xor ptr %gep, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr %out2
ret void
}
@@ -3306,7 +3306,7 @@ define amdgpu_kernel void @atomic_xor_i64_addr64_offset(ptr %out, i64 %in, i64 %
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
%gep = getelementptr i64, ptr %ptr, i64 4
- %tmp0 = atomicrmw volatile xor ptr %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xor ptr %gep, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -3357,7 +3357,7 @@ define amdgpu_kernel void @atomic_xor_i64_ret_addr64_offset(ptr %out, ptr %out2,
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
%gep = getelementptr i64, ptr %ptr, i64 4
- %tmp0 = atomicrmw volatile xor ptr %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xor ptr %gep, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr %out2
ret void
}
@@ -3391,7 +3391,7 @@ define amdgpu_kernel void @atomic_xor_i64(ptr %out, i64 %in) {
; GCN2-NEXT: buffer_wbinvl1_vol
; GCN2-NEXT: s_endpgm
entry:
- %tmp0 = atomicrmw volatile xor ptr %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xor ptr %out, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -3432,7 +3432,7 @@ define amdgpu_kernel void @atomic_xor_i64_ret(ptr %out, ptr %out2, i64 %in) {
; GCN2-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GCN2-NEXT: s_endpgm
entry:
- %tmp0 = atomicrmw volatile xor ptr %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xor ptr %out, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr %out2
ret void
}
@@ -3475,7 +3475,7 @@ define amdgpu_kernel void @atomic_xor_i64_addr64(ptr %out, i64 %in, i64 %index)
; GCN2-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
- %tmp0 = atomicrmw volatile xor ptr %ptr, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xor ptr %ptr, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -3521,7 +3521,7 @@ define amdgpu_kernel void @atomic_xor_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
; GCN2-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
- %tmp0 = atomicrmw volatile xor ptr %ptr, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xor ptr %ptr, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr %out2
ret void
}
@@ -3598,7 +3598,7 @@ define amdgpu_kernel void @atomic_load_i64(ptr %in, ptr %out) {
; GCN2-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GCN2-NEXT: s_endpgm
entry:
- %val = load atomic i64, ptr %in seq_cst, align 8
+ %val = load atomic i64, ptr %in syncscope("agent") seq_cst, align 8
store i64 %val, ptr %out
ret void
}
@@ -3882,7 +3882,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_offset(ptr %out, i64 %in, i64 %old
; GCN2-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
- %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
ret void
}
@@ -3926,7 +3926,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(ptr %out, i64 %in, i64 %ol
; GCN2-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 9000
- %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
ret void
}
@@ -3974,7 +3974,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_offset(ptr %out, ptr %out2, i6
; GCN2-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
- %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
%extract0 = extractvalue { i64, i1 } %val, 0
store i64 %extract0, ptr %out2
ret void
@@ -4025,7 +4025,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(ptr %out, i64 %in, i
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
%gep = getelementptr i64, ptr %ptr, i64 4
- %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
ret void
}
@@ -4082,7 +4082,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(ptr %out, ptr %o
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
%gep = getelementptr i64, ptr %ptr, i64 4
- %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
%extract0 = extractvalue { i64, i1 } %val, 0
store i64 %extract0, ptr %out2
ret void
@@ -4123,7 +4123,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64(ptr %out, i64 %in, i64 %old) {
; GCN2-NEXT: buffer_wbinvl1_vol
; GCN2-NEXT: s_endpgm
entry:
- %val = cmpxchg volatile ptr %out, i64 %old, i64 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr %out, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
ret void
}
@@ -4166,7 +4166,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret(ptr %out, ptr %out2, i64 %in,
; GCN2-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GCN2-NEXT: s_endpgm
entry:
- %val = cmpxchg volatile ptr %out, i64 %old, i64 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr %out, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
%extract0 = extractvalue { i64, i1 } %val, 0
store i64 %extract0, ptr %out2
ret void
@@ -4212,7 +4212,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(ptr %out, i64 %in, i64 %ind
; GCN2-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
- %val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
ret void
}
@@ -4264,7 +4264,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(ptr %out, ptr %out2, i6
; GCN2-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
- %val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
%extract0 = extractvalue { i64, i1 } %val, 0
store i64 %extract0, ptr %out2
ret void
@@ -4342,7 +4342,7 @@ define amdgpu_kernel void @atomic_load_f64(ptr %in, ptr %out) {
; GCN2-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GCN2-NEXT: s_endpgm
entry:
- %val = load atomic double, ptr %in seq_cst, align 8
+ %val = load atomic double, ptr %in syncscope("agent") seq_cst, align 8
store double %val, ptr %out
ret void
}
@@ -4620,7 +4620,7 @@ define amdgpu_kernel void @atomic_inc_i64_offset(ptr %out, i64 %in) {
; GCN2-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
- %tmp0 = atomicrmw volatile uinc_wrap ptr %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile uinc_wrap ptr %gep, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -4666,7 +4666,7 @@ define amdgpu_kernel void @atomic_inc_i64_ret_offset(ptr %out, ptr %out2, i64 %i
; GCN2-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
- %tmp0 = atomicrmw volatile uinc_wrap ptr %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile uinc_wrap ptr %gep, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr %out2
ret void
}
@@ -4714,7 +4714,7 @@ define amdgpu_kernel void @atomic_inc_i64_incr64_offset(ptr %out, i64 %in, i64 %
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
%gep = getelementptr i64, ptr %ptr, i64 4
- %tmp0 = atomicrmw volatile uinc_wrap ptr %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile uinc_wrap ptr %gep, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -4765,7 +4765,7 @@ define amdgpu_kernel void @atomic_inc_i64_ret_incr64_offset(ptr %out, ptr %out2,
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
%gep = getelementptr i64, ptr %ptr, i64 4
- %tmp0 = atomicrmw volatile uinc_wrap ptr %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile uinc_wrap ptr %gep, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr %out2
ret void
}
@@ -4799,7 +4799,7 @@ define amdgpu_kernel void @atomic_inc_i64(ptr %out, i64 %in) {
; GCN2-NEXT: buffer_wbinvl1_vol
; GCN2-NEXT: s_endpgm
entry:
- %tmp0 = atomicrmw volatile uinc_wrap ptr %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile uinc_wrap ptr %out, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -4840,7 +4840,7 @@ define amdgpu_kernel void @atomic_inc_i64_ret(ptr %out, ptr %out2, i64 %in) {
; GCN2-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GCN2-NEXT: s_endpgm
entry:
- %tmp0 = atomicrmw volatile uinc_wrap ptr %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile uinc_wrap ptr %out, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr %out2
ret void
}
@@ -4883,7 +4883,7 @@ define amdgpu_kernel void @atomic_inc_i64_incr64(ptr %out, i64 %in, i64 %index)
; GCN2-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
- %tmp0 = atomicrmw volatile uinc_wrap ptr %ptr, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile uinc_wrap ptr %ptr, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -4929,7 +4929,7 @@ define amdgpu_kernel void @atomic_inc_i64_ret_incr64(ptr %out, ptr %out2, i64 %i
; GCN2-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
- %tmp0 = atomicrmw volatile uinc_wrap ptr %ptr, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile uinc_wrap ptr %ptr, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr %out2
ret void
}
@@ -4968,7 +4968,7 @@ define amdgpu_kernel void @atomic_dec_i64_offset(ptr %out, i64 %in) {
; GCN2-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
- %tmp0 = atomicrmw volatile udec_wrap ptr %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile udec_wrap ptr %gep, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -5014,7 +5014,7 @@ define amdgpu_kernel void @atomic_dec_i64_ret_offset(ptr %out, ptr %out2, i64 %i
; GCN2-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
- %tmp0 = atomicrmw volatile udec_wrap ptr %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile udec_wrap ptr %gep, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr %out2
ret void
}
@@ -5062,7 +5062,7 @@ define amdgpu_kernel void @atomic_dec_i64_decr64_offset(ptr %out, i64 %in, i64 %
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
%gep = getelementptr i64, ptr %ptr, i64 4
- %tmp0 = atomicrmw volatile udec_wrap ptr %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile udec_wrap ptr %gep, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -5113,7 +5113,7 @@ define amdgpu_kernel void @atomic_dec_i64_ret_decr64_offset(ptr %out, ptr %out2,
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
%gep = getelementptr i64, ptr %ptr, i64 4
- %tmp0 = atomicrmw volatile udec_wrap ptr %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile udec_wrap ptr %gep, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr %out2
ret void
}
@@ -5147,7 +5147,7 @@ define amdgpu_kernel void @atomic_dec_i64(ptr %out, i64 %in) {
; GCN2-NEXT: buffer_wbinvl1_vol
; GCN2-NEXT: s_endpgm
entry:
- %tmp0 = atomicrmw volatile udec_wrap ptr %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile udec_wrap ptr %out, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -5188,7 +5188,7 @@ define amdgpu_kernel void @atomic_dec_i64_ret(ptr %out, ptr %out2, i64 %in) {
; GCN2-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GCN2-NEXT: s_endpgm
entry:
- %tmp0 = atomicrmw volatile udec_wrap ptr %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile udec_wrap ptr %out, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr %out2
ret void
}
@@ -5231,7 +5231,7 @@ define amdgpu_kernel void @atomic_dec_i64_decr64(ptr %out, i64 %in, i64 %index)
; GCN2-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
- %tmp0 = atomicrmw volatile udec_wrap ptr %ptr, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile udec_wrap ptr %ptr, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -5277,7 +5277,7 @@ define amdgpu_kernel void @atomic_dec_i64_ret_decr64(ptr %out, ptr %out2, i64 %i
; GCN2-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
- %tmp0 = atomicrmw volatile udec_wrap ptr %ptr, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile udec_wrap ptr %ptr, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr %out2
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll
index 91f23b0e9eeb58..7d772de1e164ec 100644
--- a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll
@@ -35,7 +35,7 @@ define amdgpu_ps void @global_xchg_saddr_i32_nortn(ptr addrspace(1) inreg %sbase
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %unused = atomicrmw xchg ptr addrspace(1) %gep0, i32 %data seq_cst
+ %unused = atomicrmw xchg ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst
ret void
}
@@ -71,7 +71,7 @@ define amdgpu_ps void @global_xchg_saddr_i32_nortn_offset_2047(ptr addrspace(1)
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 2047
- %unused = atomicrmw xchg ptr addrspace(1) %gep1, i32 %data seq_cst
+ %unused = atomicrmw xchg ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst
ret void
}
@@ -107,7 +107,7 @@ define amdgpu_ps void @global_xchg_saddr_i32_nortn_offset_neg2048(ptr addrspace(
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -2048
- %unused = atomicrmw xchg ptr addrspace(1) %gep1, i32 %data seq_cst
+ %unused = atomicrmw xchg ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst
ret void
}
@@ -141,7 +141,7 @@ define amdgpu_ps float @global_xchg_saddr_i32_rtn(ptr addrspace(1) inreg %sbase,
; GFX11-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %rtn = atomicrmw xchg ptr addrspace(1) %gep0, i32 %data seq_cst
+ %rtn = atomicrmw xchg ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst
%cast.rtn = bitcast i32 %rtn to float
ret float %cast.rtn
}
@@ -181,7 +181,7 @@ define amdgpu_ps float @global_xchg_saddr_i32_rtn_2048(ptr addrspace(1) inreg %s
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 2048
- %rtn = atomicrmw xchg ptr addrspace(1) %gep1, i32 %data seq_cst
+ %rtn = atomicrmw xchg ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst
%cast.rtn = bitcast i32 %rtn to float
ret float %cast.rtn
}
@@ -217,7 +217,7 @@ define amdgpu_ps float @global_xchg_saddr_i32_rtn_neg2048(ptr addrspace(1) inreg
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -2048
- %rtn = atomicrmw xchg ptr addrspace(1) %gep1, i32 %data seq_cst
+ %rtn = atomicrmw xchg ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst
%cast.rtn = bitcast i32 %rtn to float
ret float %cast.rtn
}
@@ -276,7 +276,7 @@ define amdgpu_ps float @global_xchg_saddr_uniform_ptr_in_vgprs_rtn(i32 %voffset,
%sbase = load ptr addrspace(1), ptr addrspace(3) @ptr.in.lds
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %rtn = atomicrmw xchg ptr addrspace(1) %gep0, i32 %data seq_cst
+ %rtn = atomicrmw xchg ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst
%cast.rtn = bitcast i32 %rtn to float
ret float %cast.rtn
}
@@ -330,7 +330,7 @@ define amdgpu_ps float @global_xchg_saddr_uniform_ptr_in_vgprs_rtn_immoffset(i32
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 42
- %rtn = atomicrmw xchg ptr addrspace(1) %gep1, i32 %data seq_cst
+ %rtn = atomicrmw xchg ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst
%cast.rtn = bitcast i32 %rtn to float
ret float %cast.rtn
}
@@ -383,7 +383,7 @@ define amdgpu_ps void @global_xchg_saddr_uniform_ptr_in_vgprs_nortn(i32 %voffset
%sbase = load ptr addrspace(1), ptr addrspace(3) @ptr.in.lds
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %unused = atomicrmw xchg ptr addrspace(1) %gep0, i32 %data seq_cst
+ %unused = atomicrmw xchg ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst
ret void
}
@@ -436,7 +436,7 @@ define amdgpu_ps void @global_xchg_saddr_uniform_ptr_in_vgprs_nortn_immoffset(i3
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 42
- %unused = atomicrmw xchg ptr addrspace(1) %gep1, i32 %data seq_cst
+ %unused = atomicrmw xchg ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst
ret void
}
@@ -478,7 +478,7 @@ define amdgpu_ps <2 x float> @global_xchg_saddr_i64_rtn(ptr addrspace(1) inreg %
; GFX11-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %rtn = atomicrmw xchg ptr addrspace(1) %gep0, i64 %data seq_cst
+ %rtn = atomicrmw xchg ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst
%cast.rtn = bitcast i64 %rtn to <2 x float>
ret <2 x float> %cast.rtn
}
@@ -514,7 +514,7 @@ define amdgpu_ps <2 x float> @global_xchg_saddr_i64_rtn_neg128(ptr addrspace(1)
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
- %rtn = atomicrmw xchg ptr addrspace(1) %gep1, i64 %data seq_cst
+ %rtn = atomicrmw xchg ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst
%cast.rtn = bitcast i64 %rtn to <2 x float>
ret <2 x float> %cast.rtn
}
@@ -549,7 +549,7 @@ define amdgpu_ps void @global_xchg_saddr_i64_nortn(ptr addrspace(1) inreg %sbase
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %unused = atomicrmw xchg ptr addrspace(1) %gep0, i64 %data seq_cst
+ %unused = atomicrmw xchg ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst
ret void
}
@@ -584,7 +584,7 @@ define amdgpu_ps void @global_xchg_saddr_i64_nortn_neg128(ptr addrspace(1) inreg
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
- %unused = atomicrmw xchg ptr addrspace(1) %gep1, i64 %data seq_cst
+ %unused = atomicrmw xchg ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst
ret void
}
@@ -622,7 +622,7 @@ define amdgpu_ps float @global_add_saddr_i32_rtn(ptr addrspace(1) inreg %sbase,
; GFX11-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %rtn = atomicrmw add ptr addrspace(1) %gep0, i32 %data seq_cst
+ %rtn = atomicrmw add ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst
%cast.rtn = bitcast i32 %rtn to float
ret float %cast.rtn
}
@@ -658,7 +658,7 @@ define amdgpu_ps float @global_add_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
- %rtn = atomicrmw add ptr addrspace(1) %gep1, i32 %data seq_cst
+ %rtn = atomicrmw add ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst
%cast.rtn = bitcast i32 %rtn to float
ret float %cast.rtn
}
@@ -693,7 +693,7 @@ define amdgpu_ps void @global_add_saddr_i32_nortn(ptr addrspace(1) inreg %sbase,
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %unused = atomicrmw add ptr addrspace(1) %gep0, i32 %data seq_cst
+ %unused = atomicrmw add ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst
ret void
}
@@ -728,7 +728,7 @@ define amdgpu_ps void @global_add_saddr_i32_nortn_neg128(ptr addrspace(1) inreg
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
- %unused = atomicrmw add ptr addrspace(1) %gep1, i32 %data seq_cst
+ %unused = atomicrmw add ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst
ret void
}
@@ -762,7 +762,7 @@ define amdgpu_ps <2 x float> @global_add_saddr_i64_rtn(ptr addrspace(1) inreg %s
; GFX11-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %rtn = atomicrmw add ptr addrspace(1) %gep0, i64 %data seq_cst
+ %rtn = atomicrmw add ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst
%cast.rtn = bitcast i64 %rtn to <2 x float>
ret <2 x float> %cast.rtn
}
@@ -798,7 +798,7 @@ define amdgpu_ps <2 x float> @global_add_saddr_i64_rtn_neg128(ptr addrspace(1) i
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
- %rtn = atomicrmw add ptr addrspace(1) %gep1, i64 %data seq_cst
+ %rtn = atomicrmw add ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst
%cast.rtn = bitcast i64 %rtn to <2 x float>
ret <2 x float> %cast.rtn
}
@@ -833,7 +833,7 @@ define amdgpu_ps void @global_add_saddr_i64_nortn(ptr addrspace(1) inreg %sbase,
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %unused = atomicrmw add ptr addrspace(1) %gep0, i64 %data seq_cst
+ %unused = atomicrmw add ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst
ret void
}
@@ -868,7 +868,7 @@ define amdgpu_ps void @global_add_saddr_i64_nortn_neg128(ptr addrspace(1) inreg
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
- %unused = atomicrmw add ptr addrspace(1) %gep1, i64 %data seq_cst
+ %unused = atomicrmw add ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst
ret void
}
@@ -906,7 +906,7 @@ define amdgpu_ps float @global_sub_saddr_i32_rtn(ptr addrspace(1) inreg %sbase,
; GFX11-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %rtn = atomicrmw sub ptr addrspace(1) %gep0, i32 %data seq_cst
+ %rtn = atomicrmw sub ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst
%cast.rtn = bitcast i32 %rtn to float
ret float %cast.rtn
}
@@ -942,7 +942,7 @@ define amdgpu_ps float @global_sub_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
- %rtn = atomicrmw sub ptr addrspace(1) %gep1, i32 %data seq_cst
+ %rtn = atomicrmw sub ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst
%cast.rtn = bitcast i32 %rtn to float
ret float %cast.rtn
}
@@ -977,7 +977,7 @@ define amdgpu_ps void @global_sub_saddr_i32_nortn(ptr addrspace(1) inreg %sbase,
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %unused = atomicrmw sub ptr addrspace(1) %gep0, i32 %data seq_cst
+ %unused = atomicrmw sub ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst
ret void
}
@@ -1012,7 +1012,7 @@ define amdgpu_ps void @global_sub_saddr_i32_nortn_neg128(ptr addrspace(1) inreg
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
- %unused = atomicrmw sub ptr addrspace(1) %gep1, i32 %data seq_cst
+ %unused = atomicrmw sub ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst
ret void
}
@@ -1046,7 +1046,7 @@ define amdgpu_ps <2 x float> @global_sub_saddr_i64_rtn(ptr addrspace(1) inreg %s
; GFX11-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %rtn = atomicrmw sub ptr addrspace(1) %gep0, i64 %data seq_cst
+ %rtn = atomicrmw sub ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst
%cast.rtn = bitcast i64 %rtn to <2 x float>
ret <2 x float> %cast.rtn
}
@@ -1082,7 +1082,7 @@ define amdgpu_ps <2 x float> @global_sub_saddr_i64_rtn_neg128(ptr addrspace(1) i
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
- %rtn = atomicrmw sub ptr addrspace(1) %gep1, i64 %data seq_cst
+ %rtn = atomicrmw sub ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst
%cast.rtn = bitcast i64 %rtn to <2 x float>
ret <2 x float> %cast.rtn
}
@@ -1117,7 +1117,7 @@ define amdgpu_ps void @global_sub_saddr_i64_nortn(ptr addrspace(1) inreg %sbase,
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %unused = atomicrmw sub ptr addrspace(1) %gep0, i64 %data seq_cst
+ %unused = atomicrmw sub ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst
ret void
}
@@ -1152,7 +1152,7 @@ define amdgpu_ps void @global_sub_saddr_i64_nortn_neg128(ptr addrspace(1) inreg
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
- %unused = atomicrmw sub ptr addrspace(1) %gep1, i64 %data seq_cst
+ %unused = atomicrmw sub ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst
ret void
}
@@ -1190,7 +1190,7 @@ define amdgpu_ps float @global_and_saddr_i32_rtn(ptr addrspace(1) inreg %sbase,
; GFX11-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %rtn = atomicrmw and ptr addrspace(1) %gep0, i32 %data seq_cst
+ %rtn = atomicrmw and ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst
%cast.rtn = bitcast i32 %rtn to float
ret float %cast.rtn
}
@@ -1226,7 +1226,7 @@ define amdgpu_ps float @global_and_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
- %rtn = atomicrmw and ptr addrspace(1) %gep1, i32 %data seq_cst
+ %rtn = atomicrmw and ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst
%cast.rtn = bitcast i32 %rtn to float
ret float %cast.rtn
}
@@ -1261,7 +1261,7 @@ define amdgpu_ps void @global_and_saddr_i32_nortn(ptr addrspace(1) inreg %sbase,
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %unused = atomicrmw and ptr addrspace(1) %gep0, i32 %data seq_cst
+ %unused = atomicrmw and ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst
ret void
}
@@ -1296,7 +1296,7 @@ define amdgpu_ps void @global_and_saddr_i32_nortn_neg128(ptr addrspace(1) inreg
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
- %unused = atomicrmw and ptr addrspace(1) %gep1, i32 %data seq_cst
+ %unused = atomicrmw and ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst
ret void
}
@@ -1330,7 +1330,7 @@ define amdgpu_ps <2 x float> @global_and_saddr_i64_rtn(ptr addrspace(1) inreg %s
; GFX11-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %rtn = atomicrmw and ptr addrspace(1) %gep0, i64 %data seq_cst
+ %rtn = atomicrmw and ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst
%cast.rtn = bitcast i64 %rtn to <2 x float>
ret <2 x float> %cast.rtn
}
@@ -1366,7 +1366,7 @@ define amdgpu_ps <2 x float> @global_and_saddr_i64_rtn_neg128(ptr addrspace(1) i
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
- %rtn = atomicrmw and ptr addrspace(1) %gep1, i64 %data seq_cst
+ %rtn = atomicrmw and ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst
%cast.rtn = bitcast i64 %rtn to <2 x float>
ret <2 x float> %cast.rtn
}
@@ -1401,7 +1401,7 @@ define amdgpu_ps void @global_and_saddr_i64_nortn(ptr addrspace(1) inreg %sbase,
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %unused = atomicrmw and ptr addrspace(1) %gep0, i64 %data seq_cst
+ %unused = atomicrmw and ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst
ret void
}
@@ -1436,7 +1436,7 @@ define amdgpu_ps void @global_and_saddr_i64_nortn_neg128(ptr addrspace(1) inreg
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
- %unused = atomicrmw and ptr addrspace(1) %gep1, i64 %data seq_cst
+ %unused = atomicrmw and ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst
ret void
}
@@ -1474,7 +1474,7 @@ define amdgpu_ps float @global_or_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i
; GFX11-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %rtn = atomicrmw or ptr addrspace(1) %gep0, i32 %data seq_cst
+ %rtn = atomicrmw or ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst
%cast.rtn = bitcast i32 %rtn to float
ret float %cast.rtn
}
@@ -1510,7 +1510,7 @@ define amdgpu_ps float @global_or_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %s
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
- %rtn = atomicrmw or ptr addrspace(1) %gep1, i32 %data seq_cst
+ %rtn = atomicrmw or ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst
%cast.rtn = bitcast i32 %rtn to float
ret float %cast.rtn
}
@@ -1545,7 +1545,7 @@ define amdgpu_ps void @global_or_saddr_i32_nortn(ptr addrspace(1) inreg %sbase,
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %unused = atomicrmw or ptr addrspace(1) %gep0, i32 %data seq_cst
+ %unused = atomicrmw or ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst
ret void
}
@@ -1580,7 +1580,7 @@ define amdgpu_ps void @global_or_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
- %unused = atomicrmw or ptr addrspace(1) %gep1, i32 %data seq_cst
+ %unused = atomicrmw or ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst
ret void
}
@@ -1614,7 +1614,7 @@ define amdgpu_ps <2 x float> @global_or_saddr_i64_rtn(ptr addrspace(1) inreg %sb
; GFX11-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %rtn = atomicrmw or ptr addrspace(1) %gep0, i64 %data seq_cst
+ %rtn = atomicrmw or ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst
%cast.rtn = bitcast i64 %rtn to <2 x float>
ret <2 x float> %cast.rtn
}
@@ -1650,7 +1650,7 @@ define amdgpu_ps <2 x float> @global_or_saddr_i64_rtn_neg128(ptr addrspace(1) in
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
- %rtn = atomicrmw or ptr addrspace(1) %gep1, i64 %data seq_cst
+ %rtn = atomicrmw or ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst
%cast.rtn = bitcast i64 %rtn to <2 x float>
ret <2 x float> %cast.rtn
}
@@ -1685,7 +1685,7 @@ define amdgpu_ps void @global_or_saddr_i64_nortn(ptr addrspace(1) inreg %sbase,
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %unused = atomicrmw or ptr addrspace(1) %gep0, i64 %data seq_cst
+ %unused = atomicrmw or ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst
ret void
}
@@ -1720,7 +1720,7 @@ define amdgpu_ps void @global_or_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
- %unused = atomicrmw or ptr addrspace(1) %gep1, i64 %data seq_cst
+ %unused = atomicrmw or ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst
ret void
}
@@ -1758,7 +1758,7 @@ define amdgpu_ps float @global_xor_saddr_i32_rtn(ptr addrspace(1) inreg %sbase,
; GFX11-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %rtn = atomicrmw xor ptr addrspace(1) %gep0, i32 %data seq_cst
+ %rtn = atomicrmw xor ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst
%cast.rtn = bitcast i32 %rtn to float
ret float %cast.rtn
}
@@ -1794,7 +1794,7 @@ define amdgpu_ps float @global_xor_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
- %rtn = atomicrmw xor ptr addrspace(1) %gep1, i32 %data seq_cst
+ %rtn = atomicrmw xor ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst
%cast.rtn = bitcast i32 %rtn to float
ret float %cast.rtn
}
@@ -1829,7 +1829,7 @@ define amdgpu_ps void @global_xor_saddr_i32_nortn(ptr addrspace(1) inreg %sbase,
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %unused = atomicrmw xor ptr addrspace(1) %gep0, i32 %data seq_cst
+ %unused = atomicrmw xor ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst
ret void
}
@@ -1864,7 +1864,7 @@ define amdgpu_ps void @global_xor_saddr_i32_nortn_neg128(ptr addrspace(1) inreg
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
- %unused = atomicrmw xor ptr addrspace(1) %gep1, i32 %data seq_cst
+ %unused = atomicrmw xor ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst
ret void
}
@@ -1898,7 +1898,7 @@ define amdgpu_ps <2 x float> @global_xor_saddr_i64_rtn(ptr addrspace(1) inreg %s
; GFX11-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %rtn = atomicrmw xor ptr addrspace(1) %gep0, i64 %data seq_cst
+ %rtn = atomicrmw xor ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst
%cast.rtn = bitcast i64 %rtn to <2 x float>
ret <2 x float> %cast.rtn
}
@@ -1934,7 +1934,7 @@ define amdgpu_ps <2 x float> @global_xor_saddr_i64_rtn_neg128(ptr addrspace(1) i
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
- %rtn = atomicrmw xor ptr addrspace(1) %gep1, i64 %data seq_cst
+ %rtn = atomicrmw xor ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst
%cast.rtn = bitcast i64 %rtn to <2 x float>
ret <2 x float> %cast.rtn
}
@@ -1969,7 +1969,7 @@ define amdgpu_ps void @global_xor_saddr_i64_nortn(ptr addrspace(1) inreg %sbase,
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %unused = atomicrmw xor ptr addrspace(1) %gep0, i64 %data seq_cst
+ %unused = atomicrmw xor ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst
ret void
}
@@ -2004,7 +2004,7 @@ define amdgpu_ps void @global_xor_saddr_i64_nortn_neg128(ptr addrspace(1) inreg
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
- %unused = atomicrmw xor ptr addrspace(1) %gep1, i64 %data seq_cst
+ %unused = atomicrmw xor ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst
ret void
}
@@ -3374,7 +3374,7 @@ define amdgpu_ps float @global_inc_saddr_i32_rtn(ptr addrspace(1) inreg %sbase,
; GFX11-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %rtn = atomicrmw uinc_wrap ptr addrspace(1) %gep0, i32 %data monotonic
+ %rtn = atomicrmw uinc_wrap ptr addrspace(1) %gep0, i32 %data syncscope("agent") monotonic
%cast.rtn = bitcast i32 %rtn to float
ret float %cast.rtn
}
@@ -3394,7 +3394,7 @@ define amdgpu_ps float @global_inc_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
- %rtn = atomicrmw uinc_wrap ptr addrspace(1) %gep1, i32 %data monotonic
+ %rtn = atomicrmw uinc_wrap ptr addrspace(1) %gep1, i32 %data syncscope("agent") monotonic
%cast.rtn = bitcast i32 %rtn to float
ret float %cast.rtn
}
@@ -3413,7 +3413,7 @@ define amdgpu_ps void @global_inc_saddr_i32_nortn(ptr addrspace(1) inreg %sbase,
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %unused = atomicrmw uinc_wrap ptr addrspace(1) %gep0, i32 %data monotonic
+ %unused = atomicrmw uinc_wrap ptr addrspace(1) %gep0, i32 %data syncscope("agent") monotonic
ret void
}
@@ -3432,7 +3432,7 @@ define amdgpu_ps void @global_inc_saddr_i32_nortn_neg128(ptr addrspace(1) inreg
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
- %unused = atomicrmw uinc_wrap ptr addrspace(1) %gep1, i32 %data monotonic
+ %unused = atomicrmw uinc_wrap ptr addrspace(1) %gep1, i32 %data syncscope("agent") monotonic
ret void
}
@@ -3450,7 +3450,7 @@ define amdgpu_ps <2 x float> @global_inc_saddr_i64_rtn(ptr addrspace(1) inreg %s
; GFX11-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %rtn = atomicrmw uinc_wrap ptr addrspace(1) %gep0, i64 %data monotonic
+ %rtn = atomicrmw uinc_wrap ptr addrspace(1) %gep0, i64 %data syncscope("agent") monotonic
%cast.rtn = bitcast i64 %rtn to <2 x float>
ret <2 x float> %cast.rtn
}
@@ -3470,7 +3470,7 @@ define amdgpu_ps <2 x float> @global_inc_saddr_i64_rtn_neg128(ptr addrspace(1) i
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
- %rtn = atomicrmw uinc_wrap ptr addrspace(1) %gep1, i64 %data monotonic
+ %rtn = atomicrmw uinc_wrap ptr addrspace(1) %gep1, i64 %data syncscope("agent") monotonic
%cast.rtn = bitcast i64 %rtn to <2 x float>
ret <2 x float> %cast.rtn
}
@@ -3489,7 +3489,7 @@ define amdgpu_ps void @global_inc_saddr_i64_nortn(ptr addrspace(1) inreg %sbase,
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %unused = atomicrmw uinc_wrap ptr addrspace(1) %gep0, i64 %data monotonic
+ %unused = atomicrmw uinc_wrap ptr addrspace(1) %gep0, i64 %data syncscope("agent") monotonic
ret void
}
@@ -3508,7 +3508,7 @@ define amdgpu_ps void @global_inc_saddr_i64_nortn_neg128(ptr addrspace(1) inreg
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
- %unused = atomicrmw uinc_wrap ptr addrspace(1) %gep1, i64 %data monotonic
+ %unused = atomicrmw uinc_wrap ptr addrspace(1) %gep1, i64 %data syncscope("agent") monotonic
ret void
}
@@ -3531,7 +3531,7 @@ define amdgpu_ps float @global_dec_saddr_i32_rtn(ptr addrspace(1) inreg %sbase,
; GFX11-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %rtn = atomicrmw udec_wrap ptr addrspace(1) %gep0, i32 %data monotonic
+ %rtn = atomicrmw udec_wrap ptr addrspace(1) %gep0, i32 %data syncscope("agent") monotonic
%cast.rtn = bitcast i32 %rtn to float
ret float %cast.rtn
}
@@ -3551,7 +3551,7 @@ define amdgpu_ps float @global_dec_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
- %rtn = atomicrmw udec_wrap ptr addrspace(1) %gep1, i32 %data monotonic
+ %rtn = atomicrmw udec_wrap ptr addrspace(1) %gep1, i32 %data syncscope("agent") monotonic
%cast.rtn = bitcast i32 %rtn to float
ret float %cast.rtn
}
@@ -3570,7 +3570,7 @@ define amdgpu_ps void @global_dec_saddr_i32_nortn(ptr addrspace(1) inreg %sbase,
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %unused = atomicrmw udec_wrap ptr addrspace(1) %gep0, i32 %data monotonic
+ %unused = atomicrmw udec_wrap ptr addrspace(1) %gep0, i32 %data syncscope("agent") monotonic
ret void
}
@@ -3589,7 +3589,7 @@ define amdgpu_ps void @global_dec_saddr_i32_nortn_neg128(ptr addrspace(1) inreg
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
- %unused = atomicrmw udec_wrap ptr addrspace(1) %gep1, i32 %data monotonic
+ %unused = atomicrmw udec_wrap ptr addrspace(1) %gep1, i32 %data syncscope("agent") monotonic
ret void
}
@@ -3607,7 +3607,7 @@ define amdgpu_ps <2 x float> @global_dec_saddr_i64_rtn(ptr addrspace(1) inreg %s
; GFX11-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %rtn = atomicrmw udec_wrap ptr addrspace(1) %gep0, i64 %data monotonic
+ %rtn = atomicrmw udec_wrap ptr addrspace(1) %gep0, i64 %data syncscope("agent") monotonic
%cast.rtn = bitcast i64 %rtn to <2 x float>
ret <2 x float> %cast.rtn
}
@@ -3627,7 +3627,7 @@ define amdgpu_ps <2 x float> @global_dec_saddr_i64_rtn_neg128(ptr addrspace(1) i
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
- %rtn = atomicrmw udec_wrap ptr addrspace(1) %gep1, i64 %data monotonic
+ %rtn = atomicrmw udec_wrap ptr addrspace(1) %gep1, i64 %data syncscope("agent") monotonic
%cast.rtn = bitcast i64 %rtn to <2 x float>
ret <2 x float> %cast.rtn
}
@@ -3646,7 +3646,7 @@ define amdgpu_ps void @global_dec_saddr_i64_nortn(ptr addrspace(1) inreg %sbase,
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
- %unused = atomicrmw udec_wrap ptr addrspace(1) %gep0, i64 %data monotonic
+ %unused = atomicrmw udec_wrap ptr addrspace(1) %gep0, i64 %data syncscope("agent") monotonic
ret void
}
@@ -3665,7 +3665,7 @@ define amdgpu_ps void @global_dec_saddr_i64_nortn_neg128(ptr addrspace(1) inreg
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
- %unused = atomicrmw udec_wrap ptr addrspace(1) %gep1, i64 %data monotonic
+ %unused = atomicrmw udec_wrap ptr addrspace(1) %gep1, i64 %data syncscope("agent") monotonic
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics.ll b/llvm/test/CodeGen/AMDGPU/global_atomics.ll
index 8dab061aab4789..08e06d4dd015a5 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics.ll
@@ -46,7 +46,7 @@ define amdgpu_kernel void @atomic_add_i32_offset(ptr addrspace(1) %out, i32 %in)
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i64 4
- %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -97,7 +97,7 @@ define amdgpu_kernel void @atomic_add_i32_max_neg_offset(ptr addrspace(1) %out,
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i64 -1024
- %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -146,7 +146,7 @@ define amdgpu_kernel void @atomic_add_i32_soffset(ptr addrspace(1) %out, i32 %in
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i64 9000
- %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -200,7 +200,7 @@ define amdgpu_kernel void @atomic_add_i32_huge_offset(ptr addrspace(1) %out, i32
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i64 47224239175595
- %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -258,7 +258,7 @@ define amdgpu_kernel void @atomic_add_i32_ret_offset(ptr addrspace(1) %out, ptr
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i64 4
- %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr addrspace(1) %out2
ret void
}
@@ -321,7 +321,7 @@ define amdgpu_kernel void @atomic_add_i32_addr64_offset(ptr addrspace(1) %out, i
entry:
%ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
- %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -394,7 +394,7 @@ define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(ptr addrspace(1) %ou
entry:
%ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
- %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr addrspace(1) %out2
ret void
}
@@ -441,7 +441,7 @@ define amdgpu_kernel void @atomic_add_i32(ptr addrspace(1) %out, i32 %in) {
; GFX9-NEXT: buffer_wbinvl1_vol
; GFX9-NEXT: s_endpgm
entry:
- %val = atomicrmw volatile add ptr addrspace(1) %out, i32 %in seq_cst
+ %val = atomicrmw volatile add ptr addrspace(1) %out, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -498,7 +498,7 @@ define amdgpu_kernel void @atomic_add_i32_ret(ptr addrspace(1) %out, ptr addrspa
; GFX9-NEXT: global_store_dword v0, v1, s[6:7]
; GFX9-NEXT: s_endpgm
entry:
- %val = atomicrmw volatile add ptr addrspace(1) %out, i32 %in seq_cst
+ %val = atomicrmw volatile add ptr addrspace(1) %out, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr addrspace(1) %out2
ret void
}
@@ -558,7 +558,7 @@ define amdgpu_kernel void @atomic_add_i32_addr64(ptr addrspace(1) %out, i32 %in,
; GFX9-NEXT: s_endpgm
entry:
%ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
- %val = atomicrmw volatile add ptr addrspace(1) %ptr, i32 %in seq_cst
+ %val = atomicrmw volatile add ptr addrspace(1) %ptr, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -628,7 +628,7 @@ define amdgpu_kernel void @atomic_add_i32_ret_addr64(ptr addrspace(1) %out, ptr
; GFX9-NEXT: s_endpgm
entry:
%ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
- %val = atomicrmw volatile add ptr addrspace(1) %ptr, i32 %in seq_cst
+ %val = atomicrmw volatile add ptr addrspace(1) %ptr, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr addrspace(1) %out2
ret void
}
@@ -676,7 +676,7 @@ define amdgpu_kernel void @atomic_and_i32_offset(ptr addrspace(1) %out, i32 %in)
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i64 4
- %val = atomicrmw volatile and ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile and ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -734,7 +734,7 @@ define amdgpu_kernel void @atomic_and_i32_ret_offset(ptr addrspace(1) %out, ptr
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i64 4
- %val = atomicrmw volatile and ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile and ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr addrspace(1) %out2
ret void
}
@@ -797,7 +797,7 @@ define amdgpu_kernel void @atomic_and_i32_addr64_offset(ptr addrspace(1) %out, i
entry:
%ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
- %val = atomicrmw volatile and ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile and ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -870,7 +870,7 @@ define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(ptr addrspace(1) %ou
entry:
%ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
- %val = atomicrmw volatile and ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile and ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr addrspace(1) %out2
ret void
}
@@ -917,7 +917,7 @@ define amdgpu_kernel void @atomic_and_i32(ptr addrspace(1) %out, i32 %in) {
; GFX9-NEXT: buffer_wbinvl1_vol
; GFX9-NEXT: s_endpgm
entry:
- %val = atomicrmw volatile and ptr addrspace(1) %out, i32 %in seq_cst
+ %val = atomicrmw volatile and ptr addrspace(1) %out, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -974,7 +974,7 @@ define amdgpu_kernel void @atomic_and_i32_ret(ptr addrspace(1) %out, ptr addrspa
; GFX9-NEXT: global_store_dword v0, v1, s[6:7]
; GFX9-NEXT: s_endpgm
entry:
- %val = atomicrmw volatile and ptr addrspace(1) %out, i32 %in seq_cst
+ %val = atomicrmw volatile and ptr addrspace(1) %out, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr addrspace(1) %out2
ret void
}
@@ -1034,7 +1034,7 @@ define amdgpu_kernel void @atomic_and_i32_addr64(ptr addrspace(1) %out, i32 %in,
; GFX9-NEXT: s_endpgm
entry:
%ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
- %val = atomicrmw volatile and ptr addrspace(1) %ptr, i32 %in seq_cst
+ %val = atomicrmw volatile and ptr addrspace(1) %ptr, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -1104,7 +1104,7 @@ define amdgpu_kernel void @atomic_and_i32_ret_addr64(ptr addrspace(1) %out, ptr
; GFX9-NEXT: s_endpgm
entry:
%ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
- %val = atomicrmw volatile and ptr addrspace(1) %ptr, i32 %in seq_cst
+ %val = atomicrmw volatile and ptr addrspace(1) %ptr, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr addrspace(1) %out2
ret void
}
@@ -1152,7 +1152,7 @@ define amdgpu_kernel void @atomic_sub_i32_offset(ptr addrspace(1) %out, i32 %in)
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i64 4
- %val = atomicrmw volatile sub ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile sub ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -1210,7 +1210,7 @@ define amdgpu_kernel void @atomic_sub_i32_ret_offset(ptr addrspace(1) %out, ptr
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i64 4
- %val = atomicrmw volatile sub ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile sub ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr addrspace(1) %out2
ret void
}
@@ -1273,7 +1273,7 @@ define amdgpu_kernel void @atomic_sub_i32_addr64_offset(ptr addrspace(1) %out, i
entry:
%ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
- %val = atomicrmw volatile sub ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile sub ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -1346,7 +1346,7 @@ define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(ptr addrspace(1) %ou
entry:
%ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
- %val = atomicrmw volatile sub ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile sub ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr addrspace(1) %out2
ret void
}
@@ -1393,7 +1393,7 @@ define amdgpu_kernel void @atomic_sub_i32(ptr addrspace(1) %out, i32 %in) {
; GFX9-NEXT: buffer_wbinvl1_vol
; GFX9-NEXT: s_endpgm
entry:
- %val = atomicrmw volatile sub ptr addrspace(1) %out, i32 %in seq_cst
+ %val = atomicrmw volatile sub ptr addrspace(1) %out, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -1450,7 +1450,7 @@ define amdgpu_kernel void @atomic_sub_i32_ret(ptr addrspace(1) %out, ptr addrspa
; GFX9-NEXT: global_store_dword v0, v1, s[6:7]
; GFX9-NEXT: s_endpgm
entry:
- %val = atomicrmw volatile sub ptr addrspace(1) %out, i32 %in seq_cst
+ %val = atomicrmw volatile sub ptr addrspace(1) %out, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr addrspace(1) %out2
ret void
}
@@ -1510,7 +1510,7 @@ define amdgpu_kernel void @atomic_sub_i32_addr64(ptr addrspace(1) %out, i32 %in,
; GFX9-NEXT: s_endpgm
entry:
%ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
- %val = atomicrmw volatile sub ptr addrspace(1) %ptr, i32 %in seq_cst
+ %val = atomicrmw volatile sub ptr addrspace(1) %ptr, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -1580,7 +1580,7 @@ define amdgpu_kernel void @atomic_sub_i32_ret_addr64(ptr addrspace(1) %out, ptr
; GFX9-NEXT: s_endpgm
entry:
%ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
- %val = atomicrmw volatile sub ptr addrspace(1) %ptr, i32 %in seq_cst
+ %val = atomicrmw volatile sub ptr addrspace(1) %ptr, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr addrspace(1) %out2
ret void
}
@@ -1588,90 +1588,47 @@ entry:
define amdgpu_kernel void @atomic_max_i32_offset(ptr addrspace(1) %out, i32 %in) {
; SI-LABEL: atomic_max_i32_offset:
; SI: ; %bb.0: ; %entry
-; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
-; SI-NEXT: s_load_dword s2, s[0:1], 0xb
-; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: s_load_dword s3, s[4:5], 0x4
-; SI-NEXT: s_mov_b64 s[0:1], 0
-; SI-NEXT: s_mov_b32 s7, 0xf000
+; SI-NEXT: s_load_dword s4, s[0:1], 0xb
+; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: v_mov_b32_e32 v1, s3
-; SI-NEXT: s_mov_b32 s6, -1
-; SI-NEXT: .LBB27_1: ; %atomicrmw.start
-; SI-NEXT: ; =>This Inner Loop Header: Depth=1
-; SI-NEXT: v_max_i32_e32 v0, s2, v1
-; SI-NEXT: s_waitcnt expcnt(0)
-; SI-NEXT: v_mov_b32_e32 v3, v1
-; SI-NEXT: v_mov_b32_e32 v2, v0
+; SI-NEXT: v_mov_b32_e32 v0, s4
; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; SI-NEXT: buffer_atomic_cmpswap v[2:3], off, s[4:7], 0 offset:16 glc
+; SI-NEXT: buffer_atomic_smax v0, off, s[0:3], 0 offset:16
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: buffer_wbinvl1
-; SI-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1
-; SI-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
-; SI-NEXT: v_mov_b32_e32 v1, v2
-; SI-NEXT: s_andn2_b64 exec, exec, s[0:1]
-; SI-NEXT: s_cbranch_execnz .LBB27_1
-; SI-NEXT: ; %bb.2: ; %atomicrmw.end
; SI-NEXT: s_endpgm
;
; VI-LABEL: atomic_max_i32_offset:
; VI: ; %bb.0: ; %entry
-; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
-; VI-NEXT: s_load_dword s6, s[0:1], 0x2c
-; VI-NEXT: s_mov_b64 s[4:5], 0
+; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
+; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: s_mov_b32 s3, 0xf000
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: s_load_dword s2, s[8:9], 0x10
-; VI-NEXT: s_add_u32 s0, s8, 16
-; VI-NEXT: s_addc_u32 s1, s9, 0
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_mov_b32_e32 v1, s2
; VI-NEXT: s_mov_b32 s2, -1
-; VI-NEXT: .LBB27_1: ; %atomicrmw.start
-; VI-NEXT: ; =>This Inner Loop Header: Depth=1
-; VI-NEXT: v_max_i32_e32 v0, s6, v1
-; VI-NEXT: v_mov_b32_e32 v3, v1
-; VI-NEXT: v_mov_b32_e32 v2, v0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: v_mov_b32_e32 v0, s4
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT: buffer_atomic_cmpswap v[2:3], off, s[0:3], 0 glc
+; VI-NEXT: buffer_atomic_smax v0, off, s[0:3], 0 offset:16
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_wbinvl1_vol
-; VI-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1
-; VI-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; VI-NEXT: v_mov_b32_e32 v1, v2
-; VI-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; VI-NEXT: s_cbranch_execnz .LBB27_1
-; VI-NEXT: ; %bb.2: ; %atomicrmw.end
; VI-NEXT: s_endpgm
;
; GFX9-LABEL: atomic_max_i32_offset:
; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c
-; GFX9-NEXT: s_mov_b64 s[0:1], 0
-; GFX9-NEXT: v_mov_b32_e32 v2, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_load_dword s5, s[2:3], 0x10
+; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v1, s5
-; GFX9-NEXT: .LBB27_1: ; %atomicrmw.start
-; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT: v_max_i32_e32 v0, s4, v1
+; GFX9-NEXT: v_mov_b32_e32 v1, s4
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX9-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc
+; GFX9-NEXT: global_atomic_smax v0, v1, s[2:3] offset:16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_wbinvl1_vol
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
-; GFX9-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
-; GFX9-NEXT: v_mov_b32_e32 v1, v0
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[0:1]
-; GFX9-NEXT: s_cbranch_execnz .LBB27_1
-; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i64 4
- %val = atomicrmw volatile max ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile max ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -3437,7 +3394,7 @@ define amdgpu_kernel void @atomic_or_i32_offset(ptr addrspace(1) %out, i32 %in)
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i64 4
- %val = atomicrmw volatile or ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile or ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -3495,7 +3452,7 @@ define amdgpu_kernel void @atomic_or_i32_ret_offset(ptr addrspace(1) %out, ptr a
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i64 4
- %val = atomicrmw volatile or ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile or ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr addrspace(1) %out2
ret void
}
@@ -3558,7 +3515,7 @@ define amdgpu_kernel void @atomic_or_i32_addr64_offset(ptr addrspace(1) %out, i3
entry:
%ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
- %val = atomicrmw volatile or ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile or ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -3631,7 +3588,7 @@ define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(ptr addrspace(1) %out
entry:
%ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
- %val = atomicrmw volatile or ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile or ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr addrspace(1) %out2
ret void
}
@@ -3678,7 +3635,7 @@ define amdgpu_kernel void @atomic_or_i32(ptr addrspace(1) %out, i32 %in) {
; GFX9-NEXT: buffer_wbinvl1_vol
; GFX9-NEXT: s_endpgm
entry:
- %val = atomicrmw volatile or ptr addrspace(1) %out, i32 %in seq_cst
+ %val = atomicrmw volatile or ptr addrspace(1) %out, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -3735,7 +3692,7 @@ define amdgpu_kernel void @atomic_or_i32_ret(ptr addrspace(1) %out, ptr addrspac
; GFX9-NEXT: global_store_dword v0, v1, s[6:7]
; GFX9-NEXT: s_endpgm
entry:
- %val = atomicrmw volatile or ptr addrspace(1) %out, i32 %in seq_cst
+ %val = atomicrmw volatile or ptr addrspace(1) %out, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr addrspace(1) %out2
ret void
}
@@ -3795,7 +3752,7 @@ define amdgpu_kernel void @atomic_or_i32_addr64(ptr addrspace(1) %out, i32 %in,
; GFX9-NEXT: s_endpgm
entry:
%ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
- %val = atomicrmw volatile or ptr addrspace(1) %ptr, i32 %in seq_cst
+ %val = atomicrmw volatile or ptr addrspace(1) %ptr, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -3865,7 +3822,7 @@ define amdgpu_kernel void @atomic_or_i32_ret_addr64(ptr addrspace(1) %out, ptr a
; GFX9-NEXT: s_endpgm
entry:
%ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
- %val = atomicrmw volatile or ptr addrspace(1) %ptr, i32 %in seq_cst
+ %val = atomicrmw volatile or ptr addrspace(1) %ptr, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr addrspace(1) %out2
ret void
}
@@ -3913,7 +3870,7 @@ define amdgpu_kernel void @atomic_xchg_i32_offset(ptr addrspace(1) %out, i32 %in
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i64 4
- %val = atomicrmw volatile xchg ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile xchg ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -3960,7 +3917,7 @@ define amdgpu_kernel void @atomic_xchg_f32_offset(ptr addrspace(1) %out, float %
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr float, ptr addrspace(1) %out, i64 4
- %val = atomicrmw volatile xchg ptr addrspace(1) %gep, float %in seq_cst
+ %val = atomicrmw volatile xchg ptr addrspace(1) %gep, float %in syncscope("agent") seq_cst
ret void
}
@@ -4018,7 +3975,7 @@ define amdgpu_kernel void @atomic_xchg_i32_ret_offset(ptr addrspace(1) %out, ptr
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i64 4
- %val = atomicrmw volatile xchg ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile xchg ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr addrspace(1) %out2
ret void
}
@@ -4081,7 +4038,7 @@ define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(ptr addrspace(1) %out,
entry:
%ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
- %val = atomicrmw volatile xchg ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile xchg ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -4154,7 +4111,7 @@ define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(ptr addrspace(1) %o
entry:
%ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
- %val = atomicrmw volatile xchg ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile xchg ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr addrspace(1) %out2
ret void
}
@@ -4201,7 +4158,7 @@ define amdgpu_kernel void @atomic_xchg_i32(ptr addrspace(1) %out, i32 %in) {
; GFX9-NEXT: buffer_wbinvl1_vol
; GFX9-NEXT: s_endpgm
entry:
- %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in seq_cst
+ %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -4258,7 +4215,7 @@ define amdgpu_kernel void @atomic_xchg_i32_ret(ptr addrspace(1) %out, ptr addrsp
; GFX9-NEXT: global_store_dword v0, v1, s[6:7]
; GFX9-NEXT: s_endpgm
entry:
- %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in seq_cst
+ %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr addrspace(1) %out2
ret void
}
@@ -4318,7 +4275,7 @@ define amdgpu_kernel void @atomic_xchg_i32_addr64(ptr addrspace(1) %out, i32 %in
; GFX9-NEXT: s_endpgm
entry:
%ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
- %val = atomicrmw volatile xchg ptr addrspace(1) %ptr, i32 %in seq_cst
+ %val = atomicrmw volatile xchg ptr addrspace(1) %ptr, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -4388,7 +4345,7 @@ define amdgpu_kernel void @atomic_xchg_i32_ret_addr64(ptr addrspace(1) %out, ptr
; GFX9-NEXT: s_endpgm
entry:
%ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
- %val = atomicrmw volatile xchg ptr addrspace(1) %ptr, i32 %in seq_cst
+ %val = atomicrmw volatile xchg ptr addrspace(1) %ptr, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr addrspace(1) %out2
ret void
}
@@ -4440,7 +4397,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i32_offset(ptr addrspace(1) %out, i32
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i64 4
- %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
ret void
}
@@ -4501,7 +4458,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i32_ret_offset(ptr addrspace(1) %out,
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i64 4
- %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
%extract0 = extractvalue { i32, i1 } %val, 0
store i32 %extract0, ptr addrspace(1) %out2
ret void
@@ -4571,7 +4528,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(ptr addrspace(1) %ou
entry:
%ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
- %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
ret void
}
@@ -4650,7 +4607,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(ptr addrspace(1)
entry:
%ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
- %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
%extract0 = extractvalue { i32, i1 } %val, 0
store i32 %extract0, ptr addrspace(1) %out2
ret void
@@ -4702,7 +4659,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i32(ptr addrspace(1) %out, i32 %in, i3
; GFX9-NEXT: buffer_wbinvl1_vol
; GFX9-NEXT: s_endpgm
entry:
- %val = cmpxchg volatile ptr addrspace(1) %out, i32 %old, i32 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr addrspace(1) %out, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
ret void
}
@@ -4762,7 +4719,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i32_ret(ptr addrspace(1) %out, ptr add
; GFX9-NEXT: global_store_dword v2, v0, s[6:7]
; GFX9-NEXT: s_endpgm
entry:
- %val = cmpxchg volatile ptr addrspace(1) %out, i32 %old, i32 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr addrspace(1) %out, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
%extract0 = extractvalue { i32, i1 } %val, 0
store i32 %extract0, ptr addrspace(1) %out2
ret void
@@ -4829,7 +4786,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(ptr addrspace(1) %out, i32
; GFX9-NEXT: s_endpgm
entry:
%ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
- %val = cmpxchg volatile ptr addrspace(1) %ptr, i32 %old, i32 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr addrspace(1) %ptr, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
ret void
}
@@ -4905,7 +4862,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64(ptr addrspace(1) %out,
; GFX9-NEXT: s_endpgm
entry:
%ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
- %val = cmpxchg volatile ptr addrspace(1) %ptr, i32 %old, i32 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr addrspace(1) %ptr, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
%extract0 = extractvalue { i32, i1 } %val, 0
store i32 %extract0, ptr addrspace(1) %out2
ret void
@@ -4954,7 +4911,7 @@ define amdgpu_kernel void @atomic_xor_i32_offset(ptr addrspace(1) %out, i32 %in)
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i64 4
- %val = atomicrmw volatile xor ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile xor ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -5012,7 +4969,7 @@ define amdgpu_kernel void @atomic_xor_i32_ret_offset(ptr addrspace(1) %out, ptr
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i64 4
- %val = atomicrmw volatile xor ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile xor ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr addrspace(1) %out2
ret void
}
@@ -5075,7 +5032,7 @@ define amdgpu_kernel void @atomic_xor_i32_addr64_offset(ptr addrspace(1) %out, i
entry:
%ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
- %val = atomicrmw volatile xor ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile xor ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -5148,7 +5105,7 @@ define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(ptr addrspace(1) %ou
entry:
%ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
- %val = atomicrmw volatile xor ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile xor ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr addrspace(1) %out2
ret void
}
@@ -5195,7 +5152,7 @@ define amdgpu_kernel void @atomic_xor_i32(ptr addrspace(1) %out, i32 %in) {
; GFX9-NEXT: buffer_wbinvl1_vol
; GFX9-NEXT: s_endpgm
entry:
- %val = atomicrmw volatile xor ptr addrspace(1) %out, i32 %in seq_cst
+ %val = atomicrmw volatile xor ptr addrspace(1) %out, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -5252,7 +5209,7 @@ define amdgpu_kernel void @atomic_xor_i32_ret(ptr addrspace(1) %out, ptr addrspa
; GFX9-NEXT: global_store_dword v0, v1, s[6:7]
; GFX9-NEXT: s_endpgm
entry:
- %val = atomicrmw volatile xor ptr addrspace(1) %out, i32 %in seq_cst
+ %val = atomicrmw volatile xor ptr addrspace(1) %out, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr addrspace(1) %out2
ret void
}
@@ -5312,7 +5269,7 @@ define amdgpu_kernel void @atomic_xor_i32_addr64(ptr addrspace(1) %out, i32 %in,
; GFX9-NEXT: s_endpgm
entry:
%ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
- %val = atomicrmw volatile xor ptr addrspace(1) %ptr, i32 %in seq_cst
+ %val = atomicrmw volatile xor ptr addrspace(1) %ptr, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -5382,7 +5339,7 @@ define amdgpu_kernel void @atomic_xor_i32_ret_addr64(ptr addrspace(1) %out, ptr
; GFX9-NEXT: s_endpgm
entry:
%ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
- %val = atomicrmw volatile xor ptr addrspace(1) %ptr, i32 %in seq_cst
+ %val = atomicrmw volatile xor ptr addrspace(1) %ptr, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr addrspace(1) %out2
ret void
}
@@ -5597,7 +5554,7 @@ define amdgpu_kernel void @atomic_load_i32(ptr addrspace(1) %in, ptr addrspace(1
; GFX9-NEXT: global_store_dword v0, v1, s[2:3]
; GFX9-NEXT: s_endpgm
entry:
- %val = load atomic i32, ptr addrspace(1) %in seq_cst, align 4
+ %val = load atomic i32, ptr addrspace(1) %in syncscope("agent") seq_cst, align 4
store i32 %val, ptr addrspace(1) %out
ret void
}
@@ -6642,7 +6599,7 @@ define amdgpu_kernel void @atomic_inc_i32_offset(ptr addrspace(1) %out, i32 %in)
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i64 4
- %val = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -6693,7 +6650,7 @@ define amdgpu_kernel void @atomic_inc_i32_max_neg_offset(ptr addrspace(1) %out,
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i64 -1024
- %val = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -6742,7 +6699,7 @@ define amdgpu_kernel void @atomic_inc_i32_soffset(ptr addrspace(1) %out, i32 %in
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i64 9000
- %val = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -6795,7 +6752,7 @@ define amdgpu_kernel void @atomic_inc_i32_huge_offset(ptr addrspace(1) %out, i32
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i64 47224239175595
- %val = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -6853,7 +6810,7 @@ define amdgpu_kernel void @atomic_inc_i32_ret_offset(ptr addrspace(1) %out, ptr
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i64 4
- %val = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr addrspace(1) %out2
ret void
}
@@ -6916,7 +6873,7 @@ define amdgpu_kernel void @atomic_inc_i32_addr64_offset(ptr addrspace(1) %out, i
entry:
%ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
- %val = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -6989,7 +6946,7 @@ define amdgpu_kernel void @atomic_inc_i32_ret_addr64_offset(ptr addrspace(1) %ou
entry:
%ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
- %val = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr addrspace(1) %out2
ret void
}
@@ -7037,7 +6994,7 @@ define amdgpu_kernel void @atomic_dec_i32_offset(ptr addrspace(1) %out, i32 %in)
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i64 4
- %val = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -7088,7 +7045,7 @@ define amdgpu_kernel void @atomic_dec_i32_max_neg_offset(ptr addrspace(1) %out,
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i64 -1024
- %val = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -7137,7 +7094,7 @@ define amdgpu_kernel void @atomic_dec_i32_soffset(ptr addrspace(1) %out, i32 %in
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i64 9000
- %val = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -7190,7 +7147,7 @@ define amdgpu_kernel void @atomic_dec_i32_huge_offset(ptr addrspace(1) %out, i32
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i64 47224239175595
- %val = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -7248,7 +7205,7 @@ define amdgpu_kernel void @atomic_dec_i32_ret_offset(ptr addrspace(1) %out, ptr
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i64 4
- %val = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr addrspace(1) %out2
ret void
}
@@ -7311,7 +7268,7 @@ define amdgpu_kernel void @atomic_dec_i32_addr64_offset(ptr addrspace(1) %out, i
entry:
%ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
- %val = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -7384,7 +7341,7 @@ define amdgpu_kernel void @atomic_dec_i32_ret_addr64_offset(ptr addrspace(1) %ou
entry:
%ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
- %val = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i32 %in seq_cst
+ %val = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i32 %in syncscope("agent") seq_cst
store i32 %val, ptr addrspace(1) %out2
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll
index 651ada15ffcc3b..889139450ca2f1 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll
@@ -46,7 +46,7 @@ define amdgpu_kernel void @atomic_add_i64_offset(ptr addrspace(1) %out, i64 %in)
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 4
- %tmp0 = atomicrmw volatile add ptr addrspace(1) %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile add ptr addrspace(1) %gep, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -107,7 +107,7 @@ define amdgpu_kernel void @atomic_add_i64_ret_offset(ptr addrspace(1) %out, ptr
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 4
- %tmp0 = atomicrmw volatile add ptr addrspace(1) %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile add ptr addrspace(1) %gep, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr addrspace(1) %out2
ret void
}
@@ -170,7 +170,7 @@ define amdgpu_kernel void @atomic_add_i64_addr64_offset(ptr addrspace(1) %out, i
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i64, ptr addrspace(1) %ptr, i64 4
- %tmp0 = atomicrmw volatile add ptr addrspace(1) %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile add ptr addrspace(1) %gep, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -240,7 +240,7 @@ define amdgpu_kernel void @atomic_add_i64_ret_addr64_offset(ptr addrspace(1) %ou
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i64, ptr addrspace(1) %ptr, i64 4
- %tmp0 = atomicrmw volatile add ptr addrspace(1) %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile add ptr addrspace(1) %gep, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr addrspace(1) %out2
ret void
}
@@ -291,7 +291,7 @@ define amdgpu_kernel void @atomic_add_i64(ptr addrspace(1) %out, i64 %in) {
; GFX9-NEXT: buffer_wbinvl1_vol
; GFX9-NEXT: s_endpgm
entry:
- %tmp0 = atomicrmw volatile add ptr addrspace(1) %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile add ptr addrspace(1) %out, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -351,7 +351,7 @@ define amdgpu_kernel void @atomic_add_i64_ret(ptr addrspace(1) %out, ptr addrspa
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7]
; GFX9-NEXT: s_endpgm
entry:
- %tmp0 = atomicrmw volatile add ptr addrspace(1) %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile add ptr addrspace(1) %out, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr addrspace(1) %out2
ret void
}
@@ -411,7 +411,7 @@ define amdgpu_kernel void @atomic_add_i64_addr64(ptr addrspace(1) %out, i64 %in,
; GFX9-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
- %tmp0 = atomicrmw volatile add ptr addrspace(1) %ptr, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile add ptr addrspace(1) %ptr, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -478,7 +478,7 @@ define amdgpu_kernel void @atomic_add_i64_ret_addr64(ptr addrspace(1) %out, ptr
; GFX9-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
- %tmp0 = atomicrmw volatile add ptr addrspace(1) %ptr, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile add ptr addrspace(1) %ptr, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr addrspace(1) %out2
ret void
}
@@ -526,7 +526,7 @@ define amdgpu_kernel void @atomic_and_i64_offset(ptr addrspace(1) %out, i64 %in)
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 4
- %tmp0 = atomicrmw volatile and ptr addrspace(1) %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile and ptr addrspace(1) %gep, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -587,7 +587,7 @@ define amdgpu_kernel void @atomic_and_i64_ret_offset(ptr addrspace(1) %out, ptr
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 4
- %tmp0 = atomicrmw volatile and ptr addrspace(1) %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile and ptr addrspace(1) %gep, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr addrspace(1) %out2
ret void
}
@@ -650,7 +650,7 @@ define amdgpu_kernel void @atomic_and_i64_addr64_offset(ptr addrspace(1) %out, i
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i64, ptr addrspace(1) %ptr, i64 4
- %tmp0 = atomicrmw volatile and ptr addrspace(1) %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile and ptr addrspace(1) %gep, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -720,7 +720,7 @@ define amdgpu_kernel void @atomic_and_i64_ret_addr64_offset(ptr addrspace(1) %ou
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i64, ptr addrspace(1) %ptr, i64 4
- %tmp0 = atomicrmw volatile and ptr addrspace(1) %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile and ptr addrspace(1) %gep, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr addrspace(1) %out2
ret void
}
@@ -771,7 +771,7 @@ define amdgpu_kernel void @atomic_and_i64(ptr addrspace(1) %out, i64 %in) {
; GFX9-NEXT: buffer_wbinvl1_vol
; GFX9-NEXT: s_endpgm
entry:
- %tmp0 = atomicrmw volatile and ptr addrspace(1) %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile and ptr addrspace(1) %out, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -831,7 +831,7 @@ define amdgpu_kernel void @atomic_and_i64_ret(ptr addrspace(1) %out, ptr addrspa
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7]
; GFX9-NEXT: s_endpgm
entry:
- %tmp0 = atomicrmw volatile and ptr addrspace(1) %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile and ptr addrspace(1) %out, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr addrspace(1) %out2
ret void
}
@@ -891,7 +891,7 @@ define amdgpu_kernel void @atomic_and_i64_addr64(ptr addrspace(1) %out, i64 %in,
; GFX9-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
- %tmp0 = atomicrmw volatile and ptr addrspace(1) %ptr, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile and ptr addrspace(1) %ptr, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -958,7 +958,7 @@ define amdgpu_kernel void @atomic_and_i64_ret_addr64(ptr addrspace(1) %out, ptr
; GFX9-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
- %tmp0 = atomicrmw volatile and ptr addrspace(1) %ptr, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile and ptr addrspace(1) %ptr, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr addrspace(1) %out2
ret void
}
@@ -1006,7 +1006,7 @@ define amdgpu_kernel void @atomic_sub_i64_offset(ptr addrspace(1) %out, i64 %in)
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 4
- %tmp0 = atomicrmw volatile sub ptr addrspace(1) %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile sub ptr addrspace(1) %gep, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -1067,7 +1067,7 @@ define amdgpu_kernel void @atomic_sub_i64_ret_offset(ptr addrspace(1) %out, ptr
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 4
- %tmp0 = atomicrmw volatile sub ptr addrspace(1) %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile sub ptr addrspace(1) %gep, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr addrspace(1) %out2
ret void
}
@@ -1130,7 +1130,7 @@ define amdgpu_kernel void @atomic_sub_i64_addr64_offset(ptr addrspace(1) %out, i
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i64, ptr addrspace(1) %ptr, i64 4
- %tmp0 = atomicrmw volatile sub ptr addrspace(1) %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile sub ptr addrspace(1) %gep, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -1200,7 +1200,7 @@ define amdgpu_kernel void @atomic_sub_i64_ret_addr64_offset(ptr addrspace(1) %ou
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i64, ptr addrspace(1) %ptr, i64 4
- %tmp0 = atomicrmw volatile sub ptr addrspace(1) %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile sub ptr addrspace(1) %gep, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr addrspace(1) %out2
ret void
}
@@ -1251,7 +1251,7 @@ define amdgpu_kernel void @atomic_sub_i64(ptr addrspace(1) %out, i64 %in) {
; GFX9-NEXT: buffer_wbinvl1_vol
; GFX9-NEXT: s_endpgm
entry:
- %tmp0 = atomicrmw volatile sub ptr addrspace(1) %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile sub ptr addrspace(1) %out, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -1311,7 +1311,7 @@ define amdgpu_kernel void @atomic_sub_i64_ret(ptr addrspace(1) %out, ptr addrspa
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7]
; GFX9-NEXT: s_endpgm
entry:
- %tmp0 = atomicrmw volatile sub ptr addrspace(1) %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile sub ptr addrspace(1) %out, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr addrspace(1) %out2
ret void
}
@@ -1371,7 +1371,7 @@ define amdgpu_kernel void @atomic_sub_i64_addr64(ptr addrspace(1) %out, i64 %in,
; GFX9-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
- %tmp0 = atomicrmw volatile sub ptr addrspace(1) %ptr, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile sub ptr addrspace(1) %ptr, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -1438,7 +1438,7 @@ define amdgpu_kernel void @atomic_sub_i64_ret_addr64(ptr addrspace(1) %out, ptr
; GFX9-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
- %tmp0 = atomicrmw volatile sub ptr addrspace(1) %ptr, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile sub ptr addrspace(1) %ptr, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr addrspace(1) %out2
ret void
}
@@ -3262,7 +3262,7 @@ define amdgpu_kernel void @atomic_or_i64_offset(ptr addrspace(1) %out, i64 %in)
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 4
- %tmp0 = atomicrmw volatile or ptr addrspace(1) %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile or ptr addrspace(1) %gep, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -3323,7 +3323,7 @@ define amdgpu_kernel void @atomic_or_i64_ret_offset(ptr addrspace(1) %out, ptr a
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 4
- %tmp0 = atomicrmw volatile or ptr addrspace(1) %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile or ptr addrspace(1) %gep, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr addrspace(1) %out2
ret void
}
@@ -3386,7 +3386,7 @@ define amdgpu_kernel void @atomic_or_i64_addr64_offset(ptr addrspace(1) %out, i6
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i64, ptr addrspace(1) %ptr, i64 4
- %tmp0 = atomicrmw volatile or ptr addrspace(1) %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile or ptr addrspace(1) %gep, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -3456,7 +3456,7 @@ define amdgpu_kernel void @atomic_or_i64_ret_addr64_offset(ptr addrspace(1) %out
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i64, ptr addrspace(1) %ptr, i64 4
- %tmp0 = atomicrmw volatile or ptr addrspace(1) %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile or ptr addrspace(1) %gep, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr addrspace(1) %out2
ret void
}
@@ -3507,7 +3507,7 @@ define amdgpu_kernel void @atomic_or_i64(ptr addrspace(1) %out, i64 %in) {
; GFX9-NEXT: buffer_wbinvl1_vol
; GFX9-NEXT: s_endpgm
entry:
- %tmp0 = atomicrmw volatile or ptr addrspace(1) %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile or ptr addrspace(1) %out, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -3567,7 +3567,7 @@ define amdgpu_kernel void @atomic_or_i64_ret(ptr addrspace(1) %out, ptr addrspac
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7]
; GFX9-NEXT: s_endpgm
entry:
- %tmp0 = atomicrmw volatile or ptr addrspace(1) %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile or ptr addrspace(1) %out, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr addrspace(1) %out2
ret void
}
@@ -3627,7 +3627,7 @@ define amdgpu_kernel void @atomic_or_i64_addr64(ptr addrspace(1) %out, i64 %in,
; GFX9-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
- %tmp0 = atomicrmw volatile or ptr addrspace(1) %ptr, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile or ptr addrspace(1) %ptr, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -3694,7 +3694,7 @@ define amdgpu_kernel void @atomic_or_i64_ret_addr64(ptr addrspace(1) %out, ptr a
; GFX9-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
- %tmp0 = atomicrmw volatile or ptr addrspace(1) %ptr, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile or ptr addrspace(1) %ptr, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr addrspace(1) %out2
ret void
}
@@ -3742,7 +3742,7 @@ define amdgpu_kernel void @atomic_xchg_i64_offset(ptr addrspace(1) %out, i64 %in
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 4
- %tmp0 = atomicrmw volatile xchg ptr addrspace(1) %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xchg ptr addrspace(1) %gep, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -3789,7 +3789,7 @@ define amdgpu_kernel void @atomic_xchg_f64_offset(ptr addrspace(1) %out, double
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr double, ptr addrspace(1) %out, i64 4
- %tmp0 = atomicrmw volatile xchg ptr addrspace(1) %gep, double %in seq_cst
+ %tmp0 = atomicrmw volatile xchg ptr addrspace(1) %gep, double %in syncscope("agent") seq_cst
ret void
}
@@ -3836,7 +3836,7 @@ define amdgpu_kernel void @atomic_xchg_pointer_offset(ptr addrspace(1) %out, ptr
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr ptr, ptr addrspace(1) %out, i64 4
- %tmp0 = atomicrmw volatile xchg ptr addrspace(1) %gep, ptr %in seq_cst
+ %tmp0 = atomicrmw volatile xchg ptr addrspace(1) %gep, ptr %in syncscope("agent") seq_cst
ret void
}
@@ -3897,7 +3897,7 @@ define amdgpu_kernel void @atomic_xchg_i64_ret_offset(ptr addrspace(1) %out, ptr
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 4
- %tmp0 = atomicrmw volatile xchg ptr addrspace(1) %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xchg ptr addrspace(1) %gep, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr addrspace(1) %out2
ret void
}
@@ -3960,7 +3960,7 @@ define amdgpu_kernel void @atomic_xchg_i64_addr64_offset(ptr addrspace(1) %out,
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i64, ptr addrspace(1) %ptr, i64 4
- %tmp0 = atomicrmw volatile xchg ptr addrspace(1) %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xchg ptr addrspace(1) %gep, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -4030,7 +4030,7 @@ define amdgpu_kernel void @atomic_xchg_i64_ret_addr64_offset(ptr addrspace(1) %o
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i64, ptr addrspace(1) %ptr, i64 4
- %tmp0 = atomicrmw volatile xchg ptr addrspace(1) %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xchg ptr addrspace(1) %gep, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr addrspace(1) %out2
ret void
}
@@ -4081,7 +4081,7 @@ define amdgpu_kernel void @atomic_xchg_i64(ptr addrspace(1) %out, i64 %in) {
; GFX9-NEXT: buffer_wbinvl1_vol
; GFX9-NEXT: s_endpgm
entry:
- %tmp0 = atomicrmw volatile xchg ptr addrspace(1) %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xchg ptr addrspace(1) %out, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -4141,7 +4141,7 @@ define amdgpu_kernel void @atomic_xchg_i64_ret(ptr addrspace(1) %out, ptr addrsp
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7]
; GFX9-NEXT: s_endpgm
entry:
- %tmp0 = atomicrmw volatile xchg ptr addrspace(1) %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xchg ptr addrspace(1) %out, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr addrspace(1) %out2
ret void
}
@@ -4201,7 +4201,7 @@ define amdgpu_kernel void @atomic_xchg_i64_addr64(ptr addrspace(1) %out, i64 %in
; GFX9-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
- %tmp0 = atomicrmw volatile xchg ptr addrspace(1) %ptr, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xchg ptr addrspace(1) %ptr, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -4268,7 +4268,7 @@ define amdgpu_kernel void @atomic_xchg_i64_ret_addr64(ptr addrspace(1) %out, ptr
; GFX9-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
- %tmp0 = atomicrmw volatile xchg ptr addrspace(1) %ptr, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xchg ptr addrspace(1) %ptr, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr addrspace(1) %out2
ret void
}
@@ -4316,7 +4316,7 @@ define amdgpu_kernel void @atomic_xor_i64_offset(ptr addrspace(1) %out, i64 %in)
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 4
- %tmp0 = atomicrmw volatile xor ptr addrspace(1) %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xor ptr addrspace(1) %gep, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -4377,7 +4377,7 @@ define amdgpu_kernel void @atomic_xor_i64_ret_offset(ptr addrspace(1) %out, ptr
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 4
- %tmp0 = atomicrmw volatile xor ptr addrspace(1) %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xor ptr addrspace(1) %gep, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr addrspace(1) %out2
ret void
}
@@ -4440,7 +4440,7 @@ define amdgpu_kernel void @atomic_xor_i64_addr64_offset(ptr addrspace(1) %out, i
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i64, ptr addrspace(1) %ptr, i64 4
- %tmp0 = atomicrmw volatile xor ptr addrspace(1) %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xor ptr addrspace(1) %gep, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -4510,7 +4510,7 @@ define amdgpu_kernel void @atomic_xor_i64_ret_addr64_offset(ptr addrspace(1) %ou
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i64, ptr addrspace(1) %ptr, i64 4
- %tmp0 = atomicrmw volatile xor ptr addrspace(1) %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xor ptr addrspace(1) %gep, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr addrspace(1) %out2
ret void
}
@@ -4561,7 +4561,7 @@ define amdgpu_kernel void @atomic_xor_i64(ptr addrspace(1) %out, i64 %in) {
; GFX9-NEXT: buffer_wbinvl1_vol
; GFX9-NEXT: s_endpgm
entry:
- %tmp0 = atomicrmw volatile xor ptr addrspace(1) %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xor ptr addrspace(1) %out, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -4621,7 +4621,7 @@ define amdgpu_kernel void @atomic_xor_i64_ret(ptr addrspace(1) %out, ptr addrspa
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7]
; GFX9-NEXT: s_endpgm
entry:
- %tmp0 = atomicrmw volatile xor ptr addrspace(1) %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xor ptr addrspace(1) %out, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr addrspace(1) %out2
ret void
}
@@ -4681,7 +4681,7 @@ define amdgpu_kernel void @atomic_xor_i64_addr64(ptr addrspace(1) %out, i64 %in,
; GFX9-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
- %tmp0 = atomicrmw volatile xor ptr addrspace(1) %ptr, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xor ptr addrspace(1) %ptr, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -4748,7 +4748,7 @@ define amdgpu_kernel void @atomic_xor_i64_ret_addr64(ptr addrspace(1) %out, ptr
; GFX9-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
- %tmp0 = atomicrmw volatile xor ptr addrspace(1) %ptr, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xor ptr addrspace(1) %ptr, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr addrspace(1) %out2
ret void
}
@@ -4809,7 +4809,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_offset(ptr addrspace(1) %out, i64
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 4
- %val = cmpxchg volatile ptr addrspace(1) %gep, i64 %old, i64 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr addrspace(1) %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
ret void
}
@@ -4871,7 +4871,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(ptr addrspace(1) %out, i64
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 9000
- %val = cmpxchg volatile ptr addrspace(1) %gep, i64 %old, i64 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr addrspace(1) %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
ret void
}
@@ -4935,7 +4935,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_offset(ptr addrspace(1) %out,
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 4
- %val = cmpxchg volatile ptr addrspace(1) %gep, i64 %old, i64 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr addrspace(1) %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
%extract0 = extractvalue { i64, i1 } %val, 0
store i64 %extract0, ptr addrspace(1) %out2
ret void
@@ -5003,7 +5003,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(ptr addrspace(1) %ou
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i64, ptr addrspace(1) %ptr, i64 4
- %val = cmpxchg volatile ptr addrspace(1) %gep, i64 %old, i64 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr addrspace(1) %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
ret void
}
@@ -5082,7 +5082,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(ptr addrspace(1)
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i64, ptr addrspace(1) %ptr, i64 4
- %val = cmpxchg volatile ptr addrspace(1) %gep, i64 %old, i64 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr addrspace(1) %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
%extract0 = extractvalue { i64, i1 } %val, 0
store i64 %extract0, ptr addrspace(1) %out2
ret void
@@ -5143,7 +5143,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64(ptr addrspace(1) %out, i64 %in, i6
; GFX9-NEXT: buffer_wbinvl1_vol
; GFX9-NEXT: s_endpgm
entry:
- %val = cmpxchg volatile ptr addrspace(1) %out, i64 %old, i64 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr addrspace(1) %out, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
ret void
}
@@ -5206,7 +5206,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret(ptr addrspace(1) %out, ptr add
; GFX9-NEXT: global_store_dwordx2 v4, v[0:1], s[2:3]
; GFX9-NEXT: s_endpgm
entry:
- %val = cmpxchg volatile ptr addrspace(1) %out, i64 %old, i64 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr addrspace(1) %out, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
%extract0 = extractvalue { i64, i1 } %val, 0
store i64 %extract0, ptr addrspace(1) %out2
ret void
@@ -5271,7 +5271,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(ptr addrspace(1) %out, i64
; GFX9-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
- %val = cmpxchg volatile ptr addrspace(1) %ptr, i64 %old, i64 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr addrspace(1) %ptr, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
ret void
}
@@ -5347,7 +5347,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(ptr addrspace(1) %out,
; GFX9-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
- %val = cmpxchg volatile ptr addrspace(1) %ptr, i64 %old, i64 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr addrspace(1) %ptr, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
%extract0 = extractvalue { i64, i1 } %val, 0
store i64 %extract0, ptr addrspace(1) %out2
ret void
@@ -5509,7 +5509,7 @@ define amdgpu_kernel void @atomic_load_i64(ptr addrspace(1) %in, ptr addrspace(1
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
; GFX9-NEXT: s_endpgm
entry:
- %val = load atomic i64, ptr addrspace(1) %in seq_cst, align 8
+ %val = load atomic i64, ptr addrspace(1) %in syncscope("agent") seq_cst, align 8
store i64 %val, ptr addrspace(1) %out
ret void
}
@@ -6013,7 +6013,7 @@ define amdgpu_kernel void @atomic_inc_i64_offset(ptr addrspace(1) %out, i64 %in)
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 4
- %tmp0 = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -6074,7 +6074,7 @@ define amdgpu_kernel void @atomic_inc_i64_ret_offset(ptr addrspace(1) %out, ptr
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 4
- %tmp0 = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr addrspace(1) %out2
ret void
}
@@ -6137,7 +6137,7 @@ define amdgpu_kernel void @atomic_inc_i64_incr64_offset(ptr addrspace(1) %out, i
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i64, ptr addrspace(1) %ptr, i64 4
- %tmp0 = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -6184,7 +6184,7 @@ define amdgpu_kernel void @atomic_dec_i64_offset(ptr addrspace(1) %out, i64 %in)
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 4
- %tmp0 = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i64 %in syncscope("agent") seq_cst
ret void
}
@@ -6245,7 +6245,7 @@ define amdgpu_kernel void @atomic_dec_i64_ret_offset(ptr addrspace(1) %out, ptr
; GFX9-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 4
- %tmp0 = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i64 %in syncscope("agent") seq_cst
store i64 %tmp0, ptr addrspace(1) %out2
ret void
}
@@ -6308,6 +6308,6 @@ define amdgpu_kernel void @atomic_dec_i64_decr64_offset(ptr addrspace(1) %out, i
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
%gep = getelementptr i64, ptr addrspace(1) %ptr, i64 4
- %tmp0 = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i64 %in syncscope("agent") seq_cst
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf.ll b/llvm/test/CodeGen/AMDGPU/mubuf.ll
index a97061b8cc5e38..37bd7bc92c2ddf 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf.ll
@@ -159,7 +159,7 @@ define amdgpu_kernel void @store_sgpr_ptr_large_offset(ptr addrspace(1) %out) {
; CHECK: buffer_atomic_add v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, [[SOFFSET]]
define amdgpu_kernel void @store_sgpr_ptr_large_offset_atomic(ptr addrspace(1) %out) {
%gep = getelementptr i32, ptr addrspace(1) %out, i32 32768
- %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 5 seq_cst
+ %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 5 syncscope("agent") seq_cst
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll b/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll
index d6d7a80ece38ad..0d9de8b33397b4 100644
--- a/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll
@@ -524,7 +524,7 @@ entry:
define protected amdgpu_kernel void @may_alias_atomic_rmw(ptr addrspace(1) %in, ptr addrspace(1) %out) {
; CHECK-LABEL: @may_alias_atomic_rmw(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[UNUSED:%.*]] = atomicrmw add ptr addrspace(1) [[OUT:%.*]], i32 5 seq_cst, align 4
+; CHECK-NEXT: [[UNUSED:%.*]] = atomicrmw add ptr addrspace(1) [[OUT:%.*]], i32 5 syncscope("agent") seq_cst, align 4
; CHECK-NEXT: fence syncscope("workgroup") release
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: fence syncscope("workgroup") acquire
@@ -533,7 +533,7 @@ define protected amdgpu_kernel void @may_alias_atomic_rmw(ptr addrspace(1) %in,
; CHECK-NEXT: ret void
;
entry:
- %unused = atomicrmw add ptr addrspace(1) %out, i32 5 seq_cst
+ %unused = atomicrmw add ptr addrspace(1) %out, i32 5 syncscope("agent") seq_cst
fence syncscope("workgroup") release
tail call void @llvm.amdgcn.s.barrier()
fence syncscope("workgroup") acquire
diff --git a/llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll b/llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll
index 30e79922935964..46ea7266f28254 100644
--- a/llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll
@@ -12,7 +12,7 @@ define void @shl_base_atomicrmw_global_ptr(ptr addrspace(1) %out, ptr addrspace(
%cast = ptrtoint ptr addrspace(1) %arrayidx0 to i64
%shl = shl i64 %cast, 2
%castback = inttoptr i64 %shl to ptr addrspace(1)
- %val = atomicrmw and ptr addrspace(1) %castback, i32 3 seq_cst
+ %val = atomicrmw and ptr addrspace(1) %castback, i32 3 syncscope("agent") seq_cst
store volatile i64 %cast, ptr addrspace(1) %extra.use, align 4
ret void
}
More information about the llvm-commits
mailing list