[llvm-branch-commits] [llvm] [AMDGPU] Expand scratch atomics to flat atomics if GAS is enabled (PR #154710)

Matt Arsenault via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Aug 27 07:41:55 PDT 2025


================
@@ -0,0 +1,172 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=atomic-expand %s | FileCheck -check-prefixes=GFX1200 %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -passes=atomic-expand %s | FileCheck -check-prefixes=GFX1250 %s
+
+define void @system_atomic_store_unordered_float(ptr addrspace(5) %addr, float %val) {
+; GFX1200-LABEL: define void @system_atomic_store_unordered_float(
+; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
+; GFX1200-NEXT:    store float [[VAL]], ptr addrspace(5) [[ADDR]], align 4
+; GFX1200-NEXT:    ret void
+;
+; GFX1250-LABEL: define void @system_atomic_store_unordered_float(
+; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
+; GFX1250-NEXT:    [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
+; GFX1250-NEXT:    store atomic float [[VAL]], ptr [[SCRATCH_ASCAST]] unordered, align 4
+; GFX1250-NEXT:    ret void
+;
+  store atomic float %val, ptr addrspace(5) %addr unordered, align 4
+  ret void
+}
+
+define void @system_atomic_store_unordered_i32(ptr addrspace(5) %addr, i32 %val) {
+; GFX1200-LABEL: define void @system_atomic_store_unordered_i32(
+; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
+; GFX1200-NEXT:    store i32 [[VAL]], ptr addrspace(5) [[ADDR]], align 4
+; GFX1200-NEXT:    ret void
+;
+; GFX1250-LABEL: define void @system_atomic_store_unordered_i32(
+; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
+; GFX1250-NEXT:    [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
+; GFX1250-NEXT:    store atomic i32 [[VAL]], ptr [[SCRATCH_ASCAST]] unordered, align 4
+; GFX1250-NEXT:    ret void
+;
+  store atomic i32 %val, ptr addrspace(5) %addr unordered, align 4
+  ret void
+}
+
+define void @system_atomic_store_release_i32(ptr addrspace(5) %addr, i32 %val) {
+; GFX1200-LABEL: define void @system_atomic_store_release_i32(
+; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
+; GFX1200-NEXT:    store i32 [[VAL]], ptr addrspace(5) [[ADDR]], align 4
+; GFX1200-NEXT:    ret void
+;
+; GFX1250-LABEL: define void @system_atomic_store_release_i32(
+; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
+; GFX1250-NEXT:    [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
+; GFX1250-NEXT:    store atomic i32 [[VAL]], ptr [[SCRATCH_ASCAST]] release, align 4
+; GFX1250-NEXT:    ret void
+;
+  store atomic i32 %val, ptr addrspace(5) %addr release, align 4
+  ret void
+}
+
+define void @workgroup_atomic_store_release_i32(ptr addrspace(5) %addr, i32 %val) {
+; GFX1200-LABEL: define void @workgroup_atomic_store_release_i32(
+; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
+; GFX1200-NEXT:    store i32 [[VAL]], ptr addrspace(5) [[ADDR]], align 4
+; GFX1200-NEXT:    ret void
+;
+; GFX1250-LABEL: define void @workgroup_atomic_store_release_i32(
+; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
+; GFX1250-NEXT:    [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
+; GFX1250-NEXT:    store atomic i32 [[VAL]], ptr [[SCRATCH_ASCAST]] syncscope("workgroup") release, align 4
+; GFX1250-NEXT:    ret void
+;
+  store atomic i32 %val, ptr addrspace(5) %addr syncscope("workgroup") release, align 4
+  ret void
+}
+
+define float @system_atomic_load_unordered_float(ptr addrspace(5) %addr) {
+; GFX1200-LABEL: define float @system_atomic_load_unordered_float(
+; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
+; GFX1200-NEXT:    [[VAL:%.*]] = load float, ptr addrspace(5) [[ADDR]], align 4
+; GFX1200-NEXT:    ret float [[VAL]]
+;
+; GFX1250-LABEL: define float @system_atomic_load_unordered_float(
+; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
+; GFX1250-NEXT:    [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
+; GFX1250-NEXT:    [[VAL:%.*]] = load atomic float, ptr [[SCRATCH_ASCAST]] unordered, align 4
+; GFX1250-NEXT:    ret float [[VAL]]
+;
+  %val = load atomic float, ptr addrspace(5) %addr unordered, align 4
+  ret float %val
+}
+
+define i32 @system_atomic_load_unordered_i32(ptr addrspace(5) %addr) {
+; GFX1200-LABEL: define i32 @system_atomic_load_unordered_i32(
+; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
+; GFX1200-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(5) [[ADDR]], align 4
+; GFX1200-NEXT:    ret i32 [[VAL]]
+;
+; GFX1250-LABEL: define i32 @system_atomic_load_unordered_i32(
+; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
+; GFX1250-NEXT:    [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
+; GFX1250-NEXT:    [[VAL:%.*]] = load atomic i32, ptr [[SCRATCH_ASCAST]] unordered, align 4
+; GFX1250-NEXT:    ret i32 [[VAL]]
+;
+  %val = load atomic i32, ptr addrspace(5) %addr unordered, align 4
+  ret i32 %val
+}
+
+define i32 @system_atomic_load_acquire_i32(ptr addrspace(5) %addr) {
+; GFX1200-LABEL: define i32 @system_atomic_load_acquire_i32(
+; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
+; GFX1200-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(5) [[ADDR]], align 4
+; GFX1200-NEXT:    ret i32 [[VAL]]
+;
+; GFX1250-LABEL: define i32 @system_atomic_load_acquire_i32(
+; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
+; GFX1250-NEXT:    [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
+; GFX1250-NEXT:    [[VAL:%.*]] = load atomic i32, ptr [[SCRATCH_ASCAST]] acquire, align 4
+; GFX1250-NEXT:    ret i32 [[VAL]]
+;
+  %val = load atomic i32, ptr addrspace(5) %addr acquire, align 4
+  ret i32 %val
+}
+
+define i32 @workgroup_atomic_load_acquire_i32(ptr addrspace(5) %addr) {
+; GFX1200-LABEL: define i32 @workgroup_atomic_load_acquire_i32(
+; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
+; GFX1200-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(5) [[ADDR]], align 4
+; GFX1200-NEXT:    ret i32 [[VAL]]
+;
+; GFX1250-LABEL: define i32 @workgroup_atomic_load_acquire_i32(
+; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
+; GFX1250-NEXT:    [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
+; GFX1250-NEXT:    [[VAL:%.*]] = load atomic i32, ptr [[SCRATCH_ASCAST]] syncscope("workgroup") acquire, align 4
+; GFX1250-NEXT:    ret i32 [[VAL]]
+;
+  %val = load atomic i32, ptr addrspace(5) %addr syncscope("workgroup") acquire, align 4
+  ret i32 %val
+}
+
+define i32 @system_atomic_cmpxchg_acq_rel_acquire_i32(ptr addrspace(5) %addr, i32 %old, i32 %in) {
+; GFX1200-LABEL: define i32 @system_atomic_cmpxchg_acq_rel_acquire_i32(
+; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[OLD:%.*]], i32 [[IN:%.*]]) #[[ATTR0]] {
+; GFX1200-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[ADDR]], align 4
+; GFX1200-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], [[OLD]]
+; GFX1200-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[IN]], i32 [[TMP1]]
+; GFX1200-NEXT:    store i32 [[TMP3]], ptr addrspace(5) [[ADDR]], align 4
+; GFX1200-NEXT:    [[TMP4:%.*]] = insertvalue { i32, i1 } poison, i32 [[TMP1]], 0
+; GFX1200-NEXT:    [[TMP5:%.*]] = insertvalue { i32, i1 } [[TMP4]], i1 [[TMP2]], 1
+; GFX1200-NEXT:    [[RES:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
+; GFX1200-NEXT:    ret i32 [[RES]]
+;
+; GFX1250-LABEL: define i32 @system_atomic_cmpxchg_acq_rel_acquire_i32(
+; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[OLD:%.*]], i32 [[IN:%.*]]) #[[ATTR0]] {
+; GFX1250-NEXT:    [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
+; GFX1250-NEXT:    [[VAL:%.*]] = cmpxchg volatile ptr [[SCRATCH_ASCAST]], i32 [[OLD]], i32 [[IN]] acq_rel acquire, align 4
+; GFX1250-NEXT:    [[RES:%.*]] = extractvalue { i32, i1 } [[VAL]], 0
+; GFX1250-NEXT:    ret i32 [[RES]]
+;
+  %val = cmpxchg volatile ptr addrspace(5) %addr, i32 %old, i32 %in acq_rel acquire
+  %res = extractvalue { i32, i1 } %val, 0
+  ret i32 %res
+}
+
+define i32 @system_atomicrmw_add_acq_rel_i32(ptr addrspace(5) %addr, i32 %in) {
+; GFX1200-LABEL: define i32 @system_atomicrmw_add_acq_rel_i32(
+; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[IN:%.*]]) #[[ATTR0]] {
+; GFX1200-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[ADDR]], align 4
+; GFX1200-NEXT:    store i32 [[IN]], ptr addrspace(5) [[ADDR]], align 4
+; GFX1200-NEXT:    ret i32 [[TMP1]]
+;
+; GFX1250-LABEL: define i32 @system_atomicrmw_add_acq_rel_i32(
+; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[IN:%.*]]) #[[ATTR0]] {
+; GFX1250-NEXT:    [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
+; GFX1250-NEXT:    [[VAL:%.*]] = atomicrmw volatile xchg ptr [[SCRATCH_ASCAST]], i32 [[IN]] acq_rel, align 4
+; GFX1250-NEXT:    ret i32 [[VAL]]
+;
+  %val = atomicrmw volatile xchg ptr addrspace(5) %addr, i32 %in acq_rel
+  ret i32 %val
+}
----------------
arsenm wrote:

Can you test few more atomicrmws, especially  some FP atomics. Also test 16-bit and 64-bit cases 

https://github.com/llvm/llvm-project/pull/154710


More information about the llvm-branch-commits mailing list