[llvm] 55395f5 - [AMDGPU] Remove `nosync` from image atomic intrinsics. (#76814)

Wed Jan 3 23:22:09 PST 2024

Author: sstipanovic
Date: 2024-01-04T08:22:05+01:00
New Revision: 55395f5c8375d3fce1ccbf0ab75f3539c56d61c7

URL: https://github.com/llvm/llvm-project/commit/55395f5c8375d3fce1ccbf0ab75f3539c56d61c7
DIFF: https://github.com/llvm/llvm-project/commit/55395f5c8375d3fce1ccbf0ab75f3539c56d61c7.diff

LOG: [AMDGPU] Remove `nosync` from image atomic intrinsics. (#76814)

Remove `nosync` as discussed in
https://github.com/llvm/llvm-project/pull/73613

Added: 
    llvm/test/CodeGen/AMDGPU/image-atomic-attributes.ll

Modified: 
    llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 531b1112354526..e5596258847f9f 100644

--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -837,7 +837,7 @@ class AMDGPUImageDimIntrinsicEval<AMDGPUDimProfile P_> {
 // All dimension-aware intrinsics are derived from this class.
 class AMDGPUImageDimIntrinsic<AMDGPUDimProfile P_,
                               list<IntrinsicProperty> props,
-                              list<SDNodeProperty> sdnodeprops> : DefaultAttrsIntrinsic<
+                              list<SDNodeProperty> sdnodeprops> : Intrinsic<
     P_.RetTypes,        // vdata(VGPR) -- for load/atomic-with-return
     !listconcat(
       !foreach(arg, P_.DataArgs, arg.Type),      // vdata(VGPR) -- for store/atomic
@@ -851,11 +851,12 @@ class AMDGPUImageDimIntrinsic<AMDGPUDimProfile P_,
                                                  //   gfx12+ imm: bits [0-2] = th, bits [3-4] = scope)
                                                  // TODO-GFX12: Update all other cachepolicy descriptions.
 
-     !listconcat(props,
+     !listconcat(props, [IntrNoCallback, IntrNoFree, IntrWillReturn],
           !if(P_.IsAtomic, [], [ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.DmaskArgIndex>>]),
           !if(P_.IsSample, [ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.UnormArgIndex>>], []),
           [ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.TexFailCtrlArgIndex>>,
-           ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.CachePolicyArgIndex>>]),
+           ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.CachePolicyArgIndex>>],
+          !if(P_.IsAtomic, [], [IntrNoSync])),
 
 
       "", sdnodeprops>,

diff  --git a/llvm/test/CodeGen/AMDGPU/image-atomic-attributes.ll b/llvm/test/CodeGen/AMDGPU/image-atomic-attributes.ll
new file mode 100644
index 00000000000000..dd60d9d702716d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/image-atomic-attributes.ll
@@ -0,0 +1,356 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 4
+; RUN: opt -S -mtriple=amdgcn-unknown-unknown < %s | FileCheck -check-prefixes=CHECK %s
+
+define amdgpu_ps float @atomic_swap_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+; CHECK-LABEL: define amdgpu_ps float @atomic_swap_1d(
+; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[DATA:%.*]], i32 [[S:%.*]]) {
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 [[DATA]], i32 [[S]], <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT:    [[OUT:%.*]] = bitcast i32 [[V]] to float
+; CHECK-NEXT:    ret float [[OUT]]
+;
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+define amdgpu_ps <2 x float> @atomic_swap_1d_i64(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
+; CHECK-LABEL: define amdgpu_ps <2 x float> @atomic_swap_1d_i64(
+; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i64 [[DATA:%.*]], i32 [[S:%.*]]) {
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64 [[DATA]], i32 [[S]], <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT:    [[OUT:%.*]] = bitcast i64 [[V]] to <2 x float>
+; CHECK-NEXT:    ret <2 x float> [[OUT]]
+;
+main_body:
+  %v = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i64 %v to <2 x float>
+  ret <2 x float> %out
+}
+
+define amdgpu_ps float @atomic_add_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+; CHECK-LABEL: define amdgpu_ps float @atomic_add_1d(
+; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[DATA:%.*]], i32 [[S:%.*]]) {
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 [[DATA]], i32 [[S]], <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT:    [[OUT:%.*]] = bitcast i32 [[V]] to float
+; CHECK-NEXT:    ret float [[OUT]]
+;
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+define amdgpu_ps float @atomic_sub_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+; CHECK-LABEL: define amdgpu_ps float @atomic_sub_1d(
+; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[DATA:%.*]], i32 [[S:%.*]]) {
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32 [[DATA]], i32 [[S]], <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT:    [[OUT:%.*]] = bitcast i32 [[V]] to float
+; CHECK-NEXT:    ret float [[OUT]]
+;
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+define amdgpu_ps float @atomic_smin_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+; CHECK-LABEL: define amdgpu_ps float @atomic_smin_1d(
+; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[DATA:%.*]], i32 [[S:%.*]]) {
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32 [[DATA]], i32 [[S]], <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT:    [[OUT:%.*]] = bitcast i32 [[V]] to float
+; CHECK-NEXT:    ret float [[OUT]]
+;
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+define amdgpu_ps float @atomic_umin_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+; CHECK-LABEL: define amdgpu_ps float @atomic_umin_1d(
+; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[DATA:%.*]], i32 [[S:%.*]]) {
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32 [[DATA]], i32 [[S]], <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT:    [[OUT:%.*]] = bitcast i32 [[V]] to float
+; CHECK-NEXT:    ret float [[OUT]]
+;
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+define amdgpu_ps float @atomic_smax_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+; CHECK-LABEL: define amdgpu_ps float @atomic_smax_1d(
+; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[DATA:%.*]], i32 [[S:%.*]]) {
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32 [[DATA]], i32 [[S]], <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT:    [[OUT:%.*]] = bitcast i32 [[V]] to float
+; CHECK-NEXT:    ret float [[OUT]]
+;
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+define amdgpu_ps float @atomic_umax_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+; CHECK-LABEL: define amdgpu_ps float @atomic_umax_1d(
+; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[DATA:%.*]], i32 [[S:%.*]]) {
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32 [[DATA]], i32 [[S]], <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT:    [[OUT:%.*]] = bitcast i32 [[V]] to float
+; CHECK-NEXT:    ret float [[OUT]]
+;
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+define amdgpu_ps float @atomic_and_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+; CHECK-LABEL: define amdgpu_ps float @atomic_and_1d(
+; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[DATA:%.*]], i32 [[S:%.*]]) {
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32 [[DATA]], i32 [[S]], <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT:    [[OUT:%.*]] = bitcast i32 [[V]] to float
+; CHECK-NEXT:    ret float [[OUT]]
+;
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+define amdgpu_ps float @atomic_or_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+; CHECK-LABEL: define amdgpu_ps float @atomic_or_1d(
+; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[DATA:%.*]], i32 [[S:%.*]]) {
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32 [[DATA]], i32 [[S]], <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT:    [[OUT:%.*]] = bitcast i32 [[V]] to float
+; CHECK-NEXT:    ret float [[OUT]]
+;
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+define amdgpu_ps float @atomic_xor_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+; CHECK-LABEL: define amdgpu_ps float @atomic_xor_1d(
+; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[DATA:%.*]], i32 [[S:%.*]]) {
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32 [[DATA]], i32 [[S]], <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT:    [[OUT:%.*]] = bitcast i32 [[V]] to float
+; CHECK-NEXT:    ret float [[OUT]]
+;
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+define amdgpu_ps float @atomic_inc_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+; CHECK-LABEL: define amdgpu_ps float @atomic_inc_1d(
+; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[DATA:%.*]], i32 [[S:%.*]]) {
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32 [[DATA]], i32 [[S]], <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT:    [[OUT:%.*]] = bitcast i32 [[V]] to float
+; CHECK-NEXT:    ret float [[OUT]]
+;
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+define amdgpu_ps float @atomic_dec_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+; CHECK-LABEL: define amdgpu_ps float @atomic_dec_1d(
+; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[DATA:%.*]], i32 [[S:%.*]]) {
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32 [[DATA]], i32 [[S]], <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT:    [[OUT:%.*]] = bitcast i32 [[V]] to float
+; CHECK-NEXT:    ret float [[OUT]]
+;
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+define amdgpu_ps float @atomic_cmpswap_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i32 %s) {
+; CHECK-LABEL: define amdgpu_ps float @atomic_cmpswap_1d(
+; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[CMP:%.*]], i32 [[SWAP:%.*]], i32 [[S:%.*]]) {
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 [[CMP]], i32 [[SWAP]], i32 [[S]], <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT:    [[OUT:%.*]] = bitcast i32 [[V]] to float
+; CHECK-NEXT:    ret float [[OUT]]
+;
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+define amdgpu_ps <2 x float> @atomic_cmpswap_1d_64(<8 x i32> inreg %rsrc, i64 %cmp, i64 %swap, i32 %s) {
+; CHECK-LABEL: define amdgpu_ps <2 x float> @atomic_cmpswap_1d_64(
+; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i64 [[CMP:%.*]], i64 [[SWAP:%.*]], i32 [[S:%.*]]) {
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 [[CMP]], i64 [[SWAP]], i32 [[S]], <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT:    [[OUT:%.*]] = bitcast i64 [[V]] to <2 x float>
+; CHECK-NEXT:    ret <2 x float> [[OUT]]
+;
+main_body:
+  %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i64 %v to <2 x float>
+  ret <2 x float> %out
+}
+
+define amdgpu_ps float @atomic_add_2d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t) {
+; CHECK-LABEL: define amdgpu_ps float @atomic_add_2d(
+; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[DATA:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) {
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 [[DATA]], i32 [[S]], i32 [[T]], <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT:    [[OUT:%.*]] = bitcast i32 [[V]] to float
+; CHECK-NEXT:    ret float [[OUT]]
+;
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+define amdgpu_ps float @atomic_add_3d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %r) {
+; CHECK-LABEL: define amdgpu_ps float @atomic_add_3d(
+; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[DATA:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[R:%.*]]) {
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32 [[DATA]], i32 [[S]], i32 [[T]], i32 [[R]], <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT:    [[OUT:%.*]] = bitcast i32 [[V]] to float
+; CHECK-NEXT:    ret float [[OUT]]
+;
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32 %data, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+define amdgpu_ps float @atomic_add_cube(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %face) {
+; CHECK-LABEL: define amdgpu_ps float @atomic_add_cube(
+; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[DATA:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[FACE:%.*]]) {
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32 [[DATA]], i32 [[S]], i32 [[T]], i32 [[FACE]], <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT:    [[OUT:%.*]] = bitcast i32 [[V]] to float
+; CHECK-NEXT:    ret float [[OUT]]
+;
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32 %data, i32 %s, i32 %t, i32 %face, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+define amdgpu_ps float @atomic_add_1darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %slice) {
+; CHECK-LABEL: define amdgpu_ps float @atomic_add_1darray(
+; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[DATA:%.*]], i32 [[S:%.*]], i32 [[SLICE:%.*]]) {
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i32(i32 [[DATA]], i32 [[S]], i32 [[SLICE]], <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT:    [[OUT:%.*]] = bitcast i32 [[V]] to float
+; CHECK-NEXT:    ret float [[OUT]]
+;
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i32(i32 %data, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+define amdgpu_ps float @atomic_add_2darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice) {
+; CHECK-LABEL: define amdgpu_ps float @atomic_add_2darray(
+; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[DATA:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[SLICE:%.*]]) {
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i32(i32 [[DATA]], i32 [[S]], i32 [[T]], i32 [[SLICE]], <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT:    [[OUT:%.*]] = bitcast i32 [[V]] to float
+; CHECK-NEXT:    ret float [[OUT]]
+;
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i32(i32 %data, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+define amdgpu_ps float @atomic_add_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %fragid) {
+; CHECK-LABEL: define amdgpu_ps float @atomic_add_2dmsaa(
+; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[DATA:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[FRAGID:%.*]]) {
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i32(i32 [[DATA]], i32 [[S]], i32 [[T]], i32 [[FRAGID]], <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT:    [[OUT:%.*]] = bitcast i32 [[V]] to float
+; CHECK-NEXT:    ret float [[OUT]]
+;
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i32(i32 %data, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+define amdgpu_ps float @atomic_add_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
+; CHECK-LABEL: define amdgpu_ps float @atomic_add_2darraymsaa(
+; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[DATA:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[SLICE:%.*]], i32 [[FRAGID:%.*]]) {
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i32(i32 [[DATA]], i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 [[FRAGID]], <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT:    [[OUT:%.*]] = bitcast i32 [[V]] to float
+; CHECK-NEXT:    ret float [[OUT]]
+;
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i32(i32 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+define amdgpu_ps float @atomic_add_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+; CHECK-LABEL: define amdgpu_ps float @atomic_add_1d_slc(
+; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[DATA:%.*]], i32 [[S:%.*]]) {
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 [[DATA]], i32 [[S]], <8 x i32> [[RSRC]], i32 0, i32 2)
+; CHECK-NEXT:    [[OUT:%.*]] = bitcast i32 [[V]] to float
+; CHECK-NEXT:    ret float [[OUT]]
+;
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0
+
+declare i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64, i32, <8 x i32>, i32, i32) #0
+declare i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64, i64, i32, <8 x i32>, i32, i32) #0
+
+declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn }
+;.