[llvm] 3e9f2af - [AMDGPU] Update atomic tests. NFC.
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 8 12:55:51 PST 2023
Author: Stanislav Mekhanoshin
Date: 2023-02-08T12:55:34-08:00
New Revision: 3e9f2af27ae7a2615ca360fd4785c0bfebb1b596
URL: https://github.com/llvm/llvm-project/commit/3e9f2af27ae7a2615ca360fd4785c0bfebb1b596
DIFF: https://github.com/llvm/llvm-project/commit/3e9f2af27ae7a2615ca360fd4785c0bfebb1b596.diff
LOG: [AMDGPU] Update atomic tests. NFC.
This is to precommit tests before future patch.
Added:
Modified:
llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll
llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
index 078387b008a0c..ae62666a6878e 100644
--- a/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -march=amdgcn -mcpu=gfx90a -verify-machineinstrs | FileCheck %s -check-prefix=GFX90A
+; RUN: llc < %s -march=amdgcn -mcpu=gfx940 -verify-machineinstrs | FileCheck %s -check-prefix=GFX940
declare double @llvm.amdgcn.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i1)
declare double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32, i32 immarg)
@@ -28,6 +29,17 @@ define amdgpu_kernel void @buffer_atomic_add_noret_f64(<4 x i32> inreg %rsrc, do
; GFX90A-NEXT: v_mov_b32_e32 v2, s8
; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 idxen
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: buffer_atomic_add_noret_f64:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
+; GFX940-NEXT: s_load_dword s8, s[0:1], 0x3c
+; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
+; GFX940-NEXT: v_mov_b32_e32 v2, s8
+; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 idxen
+; GFX940-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
ret void
@@ -40,6 +52,13 @@ define amdgpu_ps void @buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, double %
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: buffer_atomic_add_rtn_f64:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen sc0
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
+; GFX940-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
store double %ret, ptr undef
@@ -61,6 +80,21 @@ define amdgpu_kernel void @buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> inreg %r
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: buffer_atomic_add_rtn_f64_off4_slc:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
+; GFX940-NEXT: s_load_dword s10, s[0:1], 0x3c
+; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX940-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x44
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
+; GFX940-NEXT: v_mov_b32_e32 v2, s10
+; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 idxen offset:4 sc0 nt
+; GFX940-NEXT: v_mov_b32_e32 v2, 0
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
+; GFX940-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i1 1)
store double %ret, ptr addrspace(1) %out, align 8
@@ -78,6 +112,17 @@ define amdgpu_kernel void @raw_buffer_atomic_add_noret_f64(<4 x i32> inreg %rsrc
; GFX90A-NEXT: v_mov_b32_e32 v2, s8
; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 offen
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: raw_buffer_atomic_add_noret_f64:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
+; GFX940-NEXT: s_load_dword s8, s[0:1], 0x3c
+; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
+; GFX940-NEXT: v_mov_b32_e32 v2, s8
+; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 offen
+; GFX940-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
ret void
@@ -90,6 +135,13 @@ define amdgpu_ps void @raw_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, doub
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: raw_buffer_atomic_add_rtn_f64:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen sc0
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
+; GFX940-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
store double %ret, ptr undef
@@ -111,6 +163,21 @@ define amdgpu_kernel void @raw_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> inre
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: raw_buffer_atomic_add_rtn_f64_off4_slc:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
+; GFX940-NEXT: s_load_dword s10, s[0:1], 0x3c
+; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX940-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x44
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
+; GFX940-NEXT: v_mov_b32_e32 v2, s10
+; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 4 offen sc0 nt
+; GFX940-NEXT: v_mov_b32_e32 v2, 0
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
+; GFX940-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
store double %ret, ptr addrspace(1) %out, align 8
@@ -128,6 +195,17 @@ define amdgpu_kernel void @struct_buffer_atomic_add_noret_f64(<4 x i32> inreg %r
; GFX90A-NEXT: v_mov_b32_e32 v2, s8
; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 idxen
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: struct_buffer_atomic_add_noret_f64:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
+; GFX940-NEXT: s_load_dword s8, s[0:1], 0x3c
+; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
+; GFX940-NEXT: v_mov_b32_e32 v2, s8
+; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 idxen
+; GFX940-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
ret void
@@ -140,6 +218,13 @@ define amdgpu_ps void @struct_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, d
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: struct_buffer_atomic_add_rtn_f64:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen sc0
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
+; GFX940-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
store double %ret, ptr undef
@@ -161,6 +246,21 @@ define amdgpu_kernel void @struct_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> i
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: struct_buffer_atomic_add_rtn_f64_off4_slc:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
+; GFX940-NEXT: s_load_dword s10, s[0:1], 0x3c
+; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX940-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x44
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
+; GFX940-NEXT: v_mov_b32_e32 v2, s10
+; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 idxen offset:4 sc0 nt
+; GFX940-NEXT: v_mov_b32_e32 v2, 0
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
+; GFX940-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
store double %ret, ptr addrspace(1) %out, align 8
@@ -178,6 +278,17 @@ define amdgpu_kernel void @raw_buffer_atomic_min_noret_f64(<4 x i32> inreg %rsrc
; GFX90A-NEXT: v_mov_b32_e32 v2, s8
; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[4:7], 0 offen
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: raw_buffer_atomic_min_noret_f64:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
+; GFX940-NEXT: s_load_dword s8, s[0:1], 0x3c
+; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
+; GFX940-NEXT: v_mov_b32_e32 v2, s8
+; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[4:7], 0 offen
+; GFX940-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
ret void
@@ -190,6 +301,13 @@ define amdgpu_ps void @raw_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, doub
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: raw_buffer_atomic_min_rtn_f64:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
+; GFX940-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
store double %ret, ptr undef
@@ -211,6 +329,21 @@ define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> inre
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
+; GFX940-NEXT: s_load_dword s10, s[0:1], 0x3c
+; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX940-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x44
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
+; GFX940-NEXT: v_mov_b32_e32 v2, s10
+; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[4:7], 4 offen sc0 nt
+; GFX940-NEXT: v_mov_b32_e32 v2, 0
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
+; GFX940-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
store double %ret, ptr addrspace(1) %out, align 8
@@ -228,6 +361,17 @@ define amdgpu_kernel void @struct_buffer_atomic_min_noret_f64(<4 x i32> inreg %r
; GFX90A-NEXT: v_mov_b32_e32 v2, s8
; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[4:7], 0 idxen
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: struct_buffer_atomic_min_noret_f64:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
+; GFX940-NEXT: s_load_dword s8, s[0:1], 0x3c
+; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
+; GFX940-NEXT: v_mov_b32_e32 v2, s8
+; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[4:7], 0 idxen
+; GFX940-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
ret void
@@ -240,6 +384,13 @@ define amdgpu_ps void @struct_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, d
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: struct_buffer_atomic_min_rtn_f64:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen sc0
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
+; GFX940-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
store double %ret, ptr undef
@@ -261,6 +412,21 @@ define amdgpu_kernel void @struct_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> i
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: struct_buffer_atomic_min_rtn_f64_off4_slc:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
+; GFX940-NEXT: s_load_dword s10, s[0:1], 0x3c
+; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX940-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x44
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
+; GFX940-NEXT: v_mov_b32_e32 v2, s10
+; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[4:7], 0 idxen offset:4 sc0 nt
+; GFX940-NEXT: v_mov_b32_e32 v2, 0
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
+; GFX940-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
store double %ret, ptr addrspace(1) %out, align 8
@@ -278,6 +444,17 @@ define amdgpu_kernel void @raw_buffer_atomic_max_noret_f64(<4 x i32> inreg %rsrc
; GFX90A-NEXT: v_mov_b32_e32 v2, s8
; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[4:7], 0 offen
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: raw_buffer_atomic_max_noret_f64:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
+; GFX940-NEXT: s_load_dword s8, s[0:1], 0x3c
+; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
+; GFX940-NEXT: v_mov_b32_e32 v2, s8
+; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[4:7], 0 offen
+; GFX940-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
ret void
@@ -290,6 +467,13 @@ define amdgpu_ps void @raw_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, doub
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: raw_buffer_atomic_max_rtn_f64:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen sc0
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
+; GFX940-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
store double %ret, ptr undef
@@ -311,6 +495,21 @@ define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> inre
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
+; GFX940-NEXT: s_load_dword s10, s[0:1], 0x3c
+; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX940-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x44
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
+; GFX940-NEXT: v_mov_b32_e32 v2, s10
+; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[4:7], 4 offen sc0 nt
+; GFX940-NEXT: v_mov_b32_e32 v2, 0
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
+; GFX940-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
store double %ret, ptr addrspace(1) %out, align 8
@@ -328,6 +527,17 @@ define amdgpu_kernel void @struct_buffer_atomic_max_noret_f64(<4 x i32> inreg %r
; GFX90A-NEXT: v_mov_b32_e32 v2, s8
; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[4:7], 0 idxen
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: struct_buffer_atomic_max_noret_f64:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
+; GFX940-NEXT: s_load_dword s8, s[0:1], 0x3c
+; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
+; GFX940-NEXT: v_mov_b32_e32 v2, s8
+; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[4:7], 0 idxen
+; GFX940-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
ret void
@@ -340,6 +550,13 @@ define amdgpu_ps void @struct_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, d
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: struct_buffer_atomic_max_rtn_f64:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen sc0
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
+; GFX940-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
store double %ret, ptr undef
@@ -361,6 +578,21 @@ define amdgpu_kernel void @struct_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> i
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: struct_buffer_atomic_max_rtn_f64_off4_slc:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
+; GFX940-NEXT: s_load_dword s10, s[0:1], 0x3c
+; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX940-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x44
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
+; GFX940-NEXT: v_mov_b32_e32 v2, s10
+; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[4:7], 0 idxen offset:4 sc0 nt
+; GFX940-NEXT: v_mov_b32_e32 v2, 0
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
+; GFX940-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
store double %ret, ptr addrspace(1) %out, align 8
@@ -377,6 +609,16 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret(ptr addrspace(1) %ptr, d
; GFX90A-NEXT: v_mov_b32_e32 v1, s3
; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1]
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: global_atomic_fadd_f64_noret:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX940-NEXT: v_mov_b32_e32 v2, 0
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v0, s2
+; GFX940-NEXT: v_mov_b32_e32 v1, s3
+; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1]
+; GFX940-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
ret void
@@ -392,6 +634,16 @@ define amdgpu_kernel void @global_atomic_fmin_f64_noret(ptr addrspace(1) %ptr, d
; GFX90A-NEXT: v_mov_b32_e32 v1, s3
; GFX90A-NEXT: global_atomic_min_f64 v2, v[0:1], s[0:1]
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: global_atomic_fmin_f64_noret:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX940-NEXT: v_mov_b32_e32 v2, 0
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v0, s2
+; GFX940-NEXT: v_mov_b32_e32 v1, s3
+; GFX940-NEXT: global_atomic_min_f64 v2, v[0:1], s[0:1]
+; GFX940-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
ret void
@@ -407,6 +659,16 @@ define amdgpu_kernel void @global_atomic_fmax_f64_noret(ptr addrspace(1) %ptr, d
; GFX90A-NEXT: v_mov_b32_e32 v1, s3
; GFX90A-NEXT: global_atomic_max_f64 v2, v[0:1], s[0:1]
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: global_atomic_fmax_f64_noret:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX940-NEXT: v_mov_b32_e32 v2, 0
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v0, s2
+; GFX940-NEXT: v_mov_b32_e32 v1, s3
+; GFX940-NEXT: global_atomic_max_f64 v2, v[0:1], s[0:1]
+; GFX940-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
ret void
@@ -438,6 +700,31 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt
; GFX90A-NEXT: s_cbranch_execnz .LBB24_1
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: global_atomic_fadd_f64_noret_pat:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX940-NEXT: s_mov_b64 s[2:3], 0
+; GFX940-NEXT: v_mov_b32_e32 v4, 0
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[4:5]
+; GFX940-NEXT: .LBB24_1: ; %atomicrmw.start
+; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX940-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
+; GFX940-NEXT: buffer_wbl2 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX940-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: buffer_inv sc0 sc1
+; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
+; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
+; GFX940-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
+; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3]
+; GFX940-NEXT: s_cbranch_execnz .LBB24_1
+; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX940-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst
ret void
@@ -455,6 +742,18 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: buffer_wbinvl1_vol
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_agent:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX940-NEXT: v_mov_b32_e32 v2, 0
+; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0
+; GFX940-NEXT: buffer_wbl2 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1]
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: buffer_inv sc1
+; GFX940-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst
ret void
@@ -486,6 +785,31 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace
; GFX90A-NEXT: s_cbranch_execnz .LBB26_1
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_system:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX940-NEXT: s_mov_b64 s[2:3], 0
+; GFX940-NEXT: v_mov_b32_e32 v4, 0
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[4:5]
+; GFX940-NEXT: .LBB26_1: ; %atomicrmw.start
+; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX940-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
+; GFX940-NEXT: buffer_wbl2 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: buffer_inv sc0 sc1
+; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
+; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
+; GFX940-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
+; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3]
+; GFX940-NEXT: s_cbranch_execnz .LBB26_1
+; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX940-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst
ret void
@@ -503,6 +827,18 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: buffer_wbinvl1_vol
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_flush:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX940-NEXT: v_mov_b32_e32 v2, 0
+; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0
+; GFX940-NEXT: buffer_wbl2 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1]
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: buffer_inv sc1
+; GFX940-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst
ret void
@@ -515,6 +851,13 @@ define double @global_atomic_fadd_f64_rtn(ptr addrspace(1) %ptr, double %data) {
; GFX90A-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off glc
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: global_atomic_fadd_f64_rtn:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
main_body:
%ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
ret double %ret
@@ -546,6 +889,31 @@ define double @global_atomic_fadd_f64_rtn_pat(ptr addrspace(1) %ptr, double %dat
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: global_atomic_fadd_f64_rtn_pat:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: global_load_dwordx2 v[2:3], v[0:1], off
+; GFX940-NEXT: s_mov_b64 s[0:1], 0
+; GFX940-NEXT: .LBB29_1: ; %atomicrmw.start
+; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
+; GFX940-NEXT: v_add_f64 v[2:3], v[4:5], 4.0
+; GFX940-NEXT: buffer_wbl2 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX940-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: buffer_inv sc0 sc1
+; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
+; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
+; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1]
+; GFX940-NEXT: s_cbranch_execnz .LBB29_1
+; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX940-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX940-NEXT: v_mov_b32_e32 v0, v2
+; GFX940-NEXT: v_mov_b32_e32 v1, v3
+; GFX940-NEXT: s_setpc_b64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst
ret double %ret
@@ -562,6 +930,17 @@ define double @global_atomic_fadd_f64_rtn_pat_agent(ptr addrspace(1) %ptr, doubl
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: buffer_wbinvl1_vol
; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: global_atomic_fadd_f64_rtn_pat_agent:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0
+; GFX940-NEXT: buffer_wbl2 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX940-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: buffer_inv sc1
+; GFX940-NEXT: s_setpc_b64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst
ret double %ret
@@ -593,6 +972,31 @@ define double @global_atomic_fadd_f64_rtn_pat_system(ptr addrspace(1) %ptr, doub
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: global_atomic_fadd_f64_rtn_pat_system:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: global_load_dwordx2 v[2:3], v[0:1], off
+; GFX940-NEXT: s_mov_b64 s[0:1], 0
+; GFX940-NEXT: .LBB31_1: ; %atomicrmw.start
+; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
+; GFX940-NEXT: v_add_f64 v[2:3], v[4:5], 4.0
+; GFX940-NEXT: buffer_wbl2 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: buffer_inv sc0 sc1
+; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
+; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
+; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1]
+; GFX940-NEXT: s_cbranch_execnz .LBB31_1
+; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX940-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX940-NEXT: v_mov_b32_e32 v0, v2
+; GFX940-NEXT: v_mov_b32_e32 v1, v3
+; GFX940-NEXT: s_setpc_b64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst
ret double %ret
@@ -605,6 +1009,13 @@ define double @global_atomic_fmax_f64_rtn(ptr addrspace(1) %ptr, double %data) {
; GFX90A-NEXT: global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off glc
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: global_atomic_fmax_f64_rtn:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off sc0
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
main_body:
%ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
ret double %ret
@@ -617,6 +1028,13 @@ define double @global_atomic_fmin_f64_rtn(ptr addrspace(1) %ptr, double %data) {
; GFX90A-NEXT: global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off glc
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: global_atomic_fmin_f64_rtn:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off sc0
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
main_body:
%ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
ret double %ret
@@ -646,6 +1064,31 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrs
; GFX90A-NEXT: s_cbranch_execnz .LBB34_1
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_agent_safe:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX940-NEXT: s_mov_b64 s[2:3], 0
+; GFX940-NEXT: v_mov_b32_e32 v4, 0
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[4:5]
+; GFX940-NEXT: .LBB34_1: ; %atomicrmw.start
+; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX940-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
+; GFX940-NEXT: buffer_wbl2 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX940-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] sc0
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: buffer_inv sc1
+; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
+; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
+; GFX940-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
+; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3]
+; GFX940-NEXT: s_cbranch_execnz .LBB34_1
+; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX940-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst
ret void
@@ -677,6 +1120,31 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 {
; GFX90A-NEXT: s_cbranch_execnz .LBB35_1
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX940-NEXT: s_mov_b64 s[2:3], 0
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX940-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
+; GFX940-NEXT: .LBB35_1: ; %atomicrmw.start
+; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
+; GFX940-NEXT: v_mov_b64_e32 v[4:5], s[0:1]
+; GFX940-NEXT: buffer_wbl2 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX940-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX940-NEXT: buffer_inv sc0 sc1
+; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
+; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
+; GFX940-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
+; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3]
+; GFX940-NEXT: s_cbranch_execnz .LBB35_1
+; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX940-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst
ret void
@@ -695,6 +1163,19 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 {
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX90A-NEXT: buffer_wbinvl1_vol
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat_agent:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX940-NEXT: buffer_wbl2 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
+; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX940-NEXT: buffer_inv sc1
+; GFX940-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst
ret void
@@ -727,6 +1208,32 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 {
; GFX90A-NEXT: s_cbranch_execnz .LBB37_1
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat_system:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX940-NEXT: s_mov_b64 s[2:3], 0
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX940-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
+; GFX940-NEXT: .LBB37_1: ; %atomicrmw.start
+; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
+; GFX940-NEXT: v_mov_b64_e32 v[4:5], s[0:1]
+; GFX940-NEXT: buffer_wbl2 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: buffer_inv sc0 sc1
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
+; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
+; GFX940-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
+; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3]
+; GFX940-NEXT: s_cbranch_execnz .LBB37_1
+; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX940-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst
ret void
@@ -758,6 +1265,31 @@ define double @flat_atomic_fadd_f64_rtn_pat(ptr %ptr) #1 {
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: flat_atomic_fadd_f64_rtn_pat:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
+; GFX940-NEXT: s_mov_b64 s[0:1], 0
+; GFX940-NEXT: .LBB38_1: ; %atomicrmw.start
+; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
+; GFX940-NEXT: v_add_f64 v[2:3], v[4:5], 4.0
+; GFX940-NEXT: buffer_wbl2 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX940-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX940-NEXT: buffer_inv sc0 sc1
+; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
+; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
+; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1]
+; GFX940-NEXT: s_cbranch_execnz .LBB38_1
+; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX940-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX940-NEXT: v_mov_b32_e32 v0, v2
+; GFX940-NEXT: v_mov_b32_e32 v1, v3
+; GFX940-NEXT: s_setpc_b64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst
ret double %ret
@@ -774,6 +1306,17 @@ define double @flat_atomic_fadd_f64_rtn_pat_agent(ptr %ptr) #1 {
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX90A-NEXT: buffer_wbinvl1_vol
; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: flat_atomic_fadd_f64_rtn_pat_agent:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0
+; GFX940-NEXT: buffer_wbl2 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0
+; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX940-NEXT: buffer_inv sc1
+; GFX940-NEXT: s_setpc_b64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst
ret double %ret
@@ -806,6 +1349,32 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(ptr %ptr) #1 {
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: flat_atomic_fadd_f64_rtn_pat_system:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
+; GFX940-NEXT: s_mov_b64 s[0:1], 0
+; GFX940-NEXT: .LBB40_1: ; %atomicrmw.start
+; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
+; GFX940-NEXT: v_add_f64 v[2:3], v[4:5], 4.0
+; GFX940-NEXT: buffer_wbl2 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: buffer_inv sc0 sc1
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
+; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
+; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1]
+; GFX940-NEXT: s_cbranch_execnz .LBB40_1
+; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX940-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX940-NEXT: v_mov_b32_e32 v0, v2
+; GFX940-NEXT: v_mov_b32_e32 v1, v3
+; GFX940-NEXT: s_setpc_b64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst
ret double %ret
@@ -822,6 +1391,17 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret(ptr %ptr, double %data) {
; GFX90A-NEXT: v_mov_b32_e32 v3, s3
; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: flat_atomic_fadd_f64_noret:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v0, s0
+; GFX940-NEXT: v_mov_b32_e32 v1, s1
+; GFX940-NEXT: v_mov_b32_e32 v2, s2
+; GFX940-NEXT: v_mov_b32_e32 v3, s3
+; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
+; GFX940-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %ptr, double %data)
ret void
@@ -834,6 +1414,13 @@ define double @flat_atomic_fadd_f64_rtn(ptr %ptr, double %data) {
; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: flat_atomic_fadd_f64_rtn:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0
+; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
main_body:
%ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %ptr, double %data)
ret double %ret
@@ -863,6 +1450,31 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) {
; GFX90A-NEXT: s_cbranch_execnz .LBB43_1
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX940-NEXT: s_mov_b64 s[2:3], 0
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX940-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
+; GFX940-NEXT: .LBB43_1: ; %atomicrmw.start
+; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
+; GFX940-NEXT: v_mov_b64_e32 v[4:5], s[0:1]
+; GFX940-NEXT: buffer_wbl2 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX940-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] sc0
+; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX940-NEXT: buffer_inv sc1
+; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
+; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
+; GFX940-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
+; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3]
+; GFX940-NEXT: s_cbranch_execnz .LBB43_1
+; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX940-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst
ret void
@@ -879,6 +1491,17 @@ define amdgpu_kernel void @flat_atomic_fmin_f64_noret(ptr %ptr, double %data) {
; GFX90A-NEXT: v_mov_b32_e32 v3, s3
; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[2:3]
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: flat_atomic_fmin_f64_noret:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v0, s0
+; GFX940-NEXT: v_mov_b32_e32 v1, s1
+; GFX940-NEXT: v_mov_b32_e32 v2, s2
+; GFX940-NEXT: v_mov_b32_e32 v3, s3
+; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[2:3]
+; GFX940-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data)
ret void
@@ -891,6 +1514,13 @@ define double @flat_atomic_fmin_f64_rtn(ptr %ptr, double %data) {
; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] glc
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: flat_atomic_fmin_f64_rtn:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] sc0
+; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
main_body:
%ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data)
ret double %ret
@@ -907,6 +1537,17 @@ define amdgpu_kernel void @flat_atomic_fmax_f64_noret(ptr %ptr, double %data) {
; GFX90A-NEXT: v_mov_b32_e32 v3, s3
; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: flat_atomic_fmax_f64_noret:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v0, s0
+; GFX940-NEXT: v_mov_b32_e32 v1, s1
+; GFX940-NEXT: v_mov_b32_e32 v2, s2
+; GFX940-NEXT: v_mov_b32_e32 v3, s3
+; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
+; GFX940-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data)
ret void
@@ -919,6 +1560,13 @@ define double @flat_atomic_fmax_f64_rtn(ptr %ptr, double %data) {
; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] glc
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: flat_atomic_fmax_f64_rtn:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] sc0
+; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
main_body:
%ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data)
ret double %ret
@@ -934,6 +1582,16 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret(ptr addrspace(3) %ptr, do
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
; GFX90A-NEXT: ds_add_f64 v2, v[0:1]
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: local_atomic_fadd_f64_noret:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dword s4, s[0:1], 0x24
+; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v2, s4
+; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
+; GFX940-NEXT: ds_add_f64 v2, v[0:1]
+; GFX940-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
ret void
@@ -948,6 +1606,15 @@ define double @local_atomic_fadd_f64_rtn(ptr addrspace(3) %ptr, double %data) {
; GFX90A-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: local_atomic_fadd_f64_rtn:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v3, v2
+; GFX940-NEXT: v_mov_b32_e32 v2, v1
+; GFX940-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
main_body:
%ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
ret double %ret
@@ -963,6 +1630,16 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_from_flat_intrinsic(ptr a
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
; GFX90A-NEXT: ds_add_f64 v2, v[0:1]
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: local_atomic_fadd_f64_noret_from_flat_intrinsic:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dword s4, s[0:1], 0x24
+; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v2, s4
+; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
+; GFX940-NEXT: ds_add_f64 v2, v[0:1]
+; GFX940-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p3.f64(ptr addrspace(3) %ptr, double %data)
ret void
@@ -977,6 +1654,15 @@ define double @local_atomic_fadd_f64_rtn_from_flat_intrinsic(ptr addrspace(3) %p
; GFX90A-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: local_atomic_fadd_f64_rtn_from_flat_intrinsic:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v3, v2
+; GFX940-NEXT: v_mov_b32_e32 v2, v1
+; GFX940-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
main_body:
%ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p3.f64(ptr addrspace(3) %ptr, double %data)
ret double %ret
@@ -994,6 +1680,17 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr
; GFX90A-NEXT: ds_add_f64 v2, v[0:1]
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: local_atomic_fadd_f64_noret_pat:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dword s0, s[0:1], 0x24
+; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v2, s0
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: ds_add_f64 v2, v[0:1]
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst
ret void
@@ -1011,6 +1708,17 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3
; GFX90A-NEXT: ds_add_f64 v2, v[0:1]
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: local_atomic_fadd_f64_noret_pat_flush:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dword s0, s[0:1], 0x24
+; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v2, s0
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: ds_add_f64 v2, v[0:1]
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst
ret void
@@ -1039,6 +1747,29 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrsp
; GFX90A-NEXT: s_cbranch_execnz .LBB54_1
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX90A-NEXT: s_endpgm
+;
+; GFX940-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_load_dword s2, s[0:1], 0x24
+; GFX940-NEXT: s_mov_b64 s[0:1], 0
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v0, s2
+; GFX940-NEXT: ds_read_b64 v[0:1], v0
+; GFX940-NEXT: .LBB54_1: ; %atomicrmw.start
+; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_add_f64 v[2:3], v[0:1], 4.0
+; GFX940-NEXT: v_mov_b32_e32 v4, s2
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: ds_cmpst_rtn_b64 v[2:3], v4, v[0:1], v[2:3]
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[0:1]
+; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
+; GFX940-NEXT: v_mov_b64_e32 v[0:1], v[2:3]
+; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1]
+; GFX940-NEXT: s_cbranch_execnz .LBB54_1
+; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX940-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst
ret void
@@ -1054,6 +1785,15 @@ define double @local_atomic_fadd_f64_rtn_pat(ptr addrspace(3) %ptr, double %data
; GFX90A-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: local_atomic_fadd_f64_rtn_pat:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst
ret double %ret
@@ -1068,6 +1808,15 @@ define double @local_atomic_fadd_f64_rtn_ieee_unsafe(ptr addrspace(3) %ptr, doub
; GFX90A-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: local_atomic_fadd_f64_rtn_ieee_unsafe:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v3, v2
+; GFX940-NEXT: v_mov_b32_e32 v2, v1
+; GFX940-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
main_body:
%ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
ret double %ret
@@ -1082,6 +1831,15 @@ define double @local_atomic_fadd_f64_rtn_ieee_safe(ptr addrspace(3) %ptr, double
; GFX90A-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: local_atomic_fadd_f64_rtn_ieee_safe:
+; GFX940: ; %bb.0: ; %main_body
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v3, v2
+; GFX940-NEXT: v_mov_b32_e32 v2, v1
+; GFX940-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: s_setpc_b64 s[30:31]
main_body:
%ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
ret double %ret
diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll
index 542e4a3761190..22eec1d98072f 100644
--- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll
+++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll
@@ -9,65 +9,65 @@ define float @syncscope_system(ptr %addr, float %val) #0 {
; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr [[ADDR:%.*]], align 4
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX908: atomicrmw.start:
-; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]]
-; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
-; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX908: atomicrmw.end:
-; GFX908-NEXT: ret float [[TMP6]]
+; GFX908-NEXT: ret float [[TMP5]]
;
; GFX90A-LABEL: @syncscope_system(
; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr [[ADDR:%.*]], align 4
; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX90A: atomicrmw.start:
-; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]]
-; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
-; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4
+; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX90A: atomicrmw.end:
-; GFX90A-NEXT: ret float [[TMP6]]
+; GFX90A-NEXT: ret float [[TMP5]]
;
; GFX940-LABEL: @syncscope_system(
; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr [[ADDR:%.*]], align 4
; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX940: atomicrmw.start:
-; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX940-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]]
-; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
-; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX940-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX940-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4
+; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX940-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX940: atomicrmw.end:
-; GFX940-NEXT: ret float [[TMP6]]
+; GFX940-NEXT: ret float [[TMP5]]
;
; GFX1100-LABEL: @syncscope_system(
; GFX1100-NEXT: [[TMP1:%.*]] = load float, ptr [[ADDR:%.*]], align 4
; GFX1100-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX1100: atomicrmw.start:
-; GFX1100-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX1100-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX1100-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]]
-; GFX1100-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX1100-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX1100-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
-; GFX1100-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX1100-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX1100-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX1100-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX1100-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX1100-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4
+; GFX1100-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX1100-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX1100-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX1100-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX1100: atomicrmw.end:
-; GFX1100-NEXT: ret float [[TMP6]]
+; GFX1100-NEXT: ret float [[TMP5]]
;
; GFX11-LABEL: @syncscope_system(
; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr [[ADDR:%.*]], align 4
@@ -93,17 +93,17 @@ define float @syncscope_workgroup_rtn(ptr %addr, float %val) #0 {
; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr [[ADDR:%.*]], align 4
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX908: atomicrmw.start:
-; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]]
-; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("workgroup") seq_cst seq_cst, align 4
-; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("workgroup") seq_cst seq_cst, align 4
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX908: atomicrmw.end:
-; GFX908-NEXT: ret float [[TMP6]]
+; GFX908-NEXT: ret float [[TMP5]]
;
; GFX90A-LABEL: @syncscope_workgroup_rtn(
; GFX90A-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]]
@@ -111,24 +111,24 @@ define float @syncscope_workgroup_rtn(ptr %addr, float %val) #0 {
; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR:%.*]])
; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]]
; GFX90A: atomicrmw.shared:
-; GFX90A-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(3)
-; GFX90A-NEXT: [[TMP3:%.*]] = atomicrmw fadd ptr addrspace(3) [[TMP2]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4
+; GFX90A-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(3)
+; GFX90A-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr addrspace(3) [[TMP1]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4
; GFX90A-NEXT: br label [[ATOMICRMW_PHI:%.*]]
; GFX90A: atomicrmw.check.private:
; GFX90A-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[ADDR]])
; GFX90A-NEXT: br i1 [[IS_PRIVATE]], label [[ATOMICRMW_PRIVATE:%.*]], label [[ATOMICRMW_GLOBAL:%.*]]
; GFX90A: atomicrmw.private:
-; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(5)
-; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP4]], align 4
+; GFX90A-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(5)
+; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP3]], align 4
; GFX90A-NEXT: [[VAL_NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]]
-; GFX90A-NEXT: store float [[VAL_NEW]], ptr addrspace(5) [[TMP4]], align 4
+; GFX90A-NEXT: store float [[VAL_NEW]], ptr addrspace(5) [[TMP3]], align 4
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
; GFX90A: atomicrmw.global:
-; GFX90A-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
-; GFX90A-NEXT: [[TMP6:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP5]], float [[VAL]] syncscope("workgroup") seq_cst, align 4
+; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
+; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
; GFX90A: atomicrmw.phi:
-; GFX90A-NEXT: [[LOADED_PHI:%.*]] = phi float [ [[TMP3]], [[ATOMICRMW_SHARED]] ], [ [[LOADED_PRIVATE]], [[ATOMICRMW_PRIVATE]] ], [ [[TMP6]], [[ATOMICRMW_GLOBAL]] ]
+; GFX90A-NEXT: [[LOADED_PHI:%.*]] = phi float [ [[TMP2]], [[ATOMICRMW_SHARED]] ], [ [[LOADED_PRIVATE]], [[ATOMICRMW_PRIVATE]] ], [ [[TMP5]], [[ATOMICRMW_GLOBAL]] ]
; GFX90A-NEXT: br label [[ATOMICRMW_END:%.*]]
; GFX90A: atomicrmw.end:
; GFX90A-NEXT: ret float [[LOADED_PHI]]
@@ -167,24 +167,24 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) #0 {
; GFX908-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR:%.*]])
; GFX908-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]]
; GFX908: atomicrmw.shared:
-; GFX908-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(3)
-; GFX908-NEXT: [[TMP3:%.*]] = atomicrmw fadd ptr addrspace(3) [[TMP2]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4
+; GFX908-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(3)
+; GFX908-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr addrspace(3) [[TMP1]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4
; GFX908-NEXT: br label [[ATOMICRMW_PHI:%.*]]
; GFX908: atomicrmw.check.private:
; GFX908-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[ADDR]])
; GFX908-NEXT: br i1 [[IS_PRIVATE]], label [[ATOMICRMW_PRIVATE:%.*]], label [[ATOMICRMW_GLOBAL:%.*]]
; GFX908: atomicrmw.private:
-; GFX908-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(5)
-; GFX908-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP4]], align 4
+; GFX908-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(5)
+; GFX908-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP3]], align 4
; GFX908-NEXT: [[VAL_NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]]
-; GFX908-NEXT: store float [[VAL_NEW]], ptr addrspace(5) [[TMP4]], align 4
+; GFX908-NEXT: store float [[VAL_NEW]], ptr addrspace(5) [[TMP3]], align 4
; GFX908-NEXT: br label [[ATOMICRMW_PHI]]
; GFX908: atomicrmw.global:
-; GFX908-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
-; GFX908-NEXT: [[TMP6:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP5]], float [[VAL]] syncscope("workgroup") seq_cst, align 4
+; GFX908-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
+; GFX908-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4
; GFX908-NEXT: br label [[ATOMICRMW_PHI]]
; GFX908: atomicrmw.phi:
-; GFX908-NEXT: [[LOADED_PHI:%.*]] = phi float [ [[TMP3]], [[ATOMICRMW_SHARED]] ], [ [[LOADED_PRIVATE]], [[ATOMICRMW_PRIVATE]] ], [ [[TMP6]], [[ATOMICRMW_GLOBAL]] ]
+; GFX908-NEXT: [[LOADED_PHI:%.*]] = phi float [ [[TMP2]], [[ATOMICRMW_SHARED]] ], [ [[LOADED_PRIVATE]], [[ATOMICRMW_PRIVATE]] ], [ [[TMP5]], [[ATOMICRMW_GLOBAL]] ]
; GFX908-NEXT: br label [[ATOMICRMW_END:%.*]]
; GFX908: atomicrmw.end:
; GFX908-NEXT: ret void
@@ -195,24 +195,24 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) #0 {
; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR:%.*]])
; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]]
; GFX90A: atomicrmw.shared:
-; GFX90A-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(3)
-; GFX90A-NEXT: [[TMP3:%.*]] = atomicrmw fadd ptr addrspace(3) [[TMP2]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4
+; GFX90A-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(3)
+; GFX90A-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr addrspace(3) [[TMP1]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4
; GFX90A-NEXT: br label [[ATOMICRMW_PHI:%.*]]
; GFX90A: atomicrmw.check.private:
; GFX90A-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[ADDR]])
; GFX90A-NEXT: br i1 [[IS_PRIVATE]], label [[ATOMICRMW_PRIVATE:%.*]], label [[ATOMICRMW_GLOBAL:%.*]]
; GFX90A: atomicrmw.private:
-; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(5)
-; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP4]], align 4
+; GFX90A-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(5)
+; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP3]], align 4
; GFX90A-NEXT: [[VAL_NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]]
-; GFX90A-NEXT: store float [[VAL_NEW]], ptr addrspace(5) [[TMP4]], align 4
+; GFX90A-NEXT: store float [[VAL_NEW]], ptr addrspace(5) [[TMP3]], align 4
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
; GFX90A: atomicrmw.global:
-; GFX90A-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
-; GFX90A-NEXT: [[TMP6:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP5]], float [[VAL]] syncscope("workgroup") seq_cst, align 4
+; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
+; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
; GFX90A: atomicrmw.phi:
-; GFX90A-NEXT: [[LOADED_PHI:%.*]] = phi float [ [[TMP3]], [[ATOMICRMW_SHARED]] ], [ [[LOADED_PRIVATE]], [[ATOMICRMW_PRIVATE]] ], [ [[TMP6]], [[ATOMICRMW_GLOBAL]] ]
+; GFX90A-NEXT: [[LOADED_PHI:%.*]] = phi float [ [[TMP2]], [[ATOMICRMW_SHARED]] ], [ [[LOADED_PRIVATE]], [[ATOMICRMW_PRIVATE]] ], [ [[TMP5]], [[ATOMICRMW_GLOBAL]] ]
; GFX90A-NEXT: br label [[ATOMICRMW_END:%.*]]
; GFX90A: atomicrmw.end:
; GFX90A-NEXT: ret void
@@ -249,65 +249,65 @@ define float @no_unsafe(ptr %addr, float %val) {
; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr [[ADDR:%.*]], align 4
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX908: atomicrmw.start:
-; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]]
-; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("workgroup") seq_cst seq_cst, align 4
-; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("workgroup") seq_cst seq_cst, align 4
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX908: atomicrmw.end:
-; GFX908-NEXT: ret float [[TMP6]]
+; GFX908-NEXT: ret float [[TMP5]]
;
; GFX90A-LABEL: @no_unsafe(
; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr [[ADDR:%.*]], align 4
; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX90A: atomicrmw.start:
-; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]]
-; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("workgroup") seq_cst seq_cst, align 4
-; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("workgroup") seq_cst seq_cst, align 4
+; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX90A: atomicrmw.end:
-; GFX90A-NEXT: ret float [[TMP6]]
+; GFX90A-NEXT: ret float [[TMP5]]
;
; GFX940-LABEL: @no_unsafe(
; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr [[ADDR:%.*]], align 4
; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX940: atomicrmw.start:
-; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX940-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]]
-; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("workgroup") seq_cst seq_cst, align 4
-; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX940-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX940-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("workgroup") seq_cst seq_cst, align 4
+; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX940-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX940: atomicrmw.end:
-; GFX940-NEXT: ret float [[TMP6]]
+; GFX940-NEXT: ret float [[TMP5]]
;
; GFX1100-LABEL: @no_unsafe(
; GFX1100-NEXT: [[TMP1:%.*]] = load float, ptr [[ADDR:%.*]], align 4
; GFX1100-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX1100: atomicrmw.start:
-; GFX1100-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX1100-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX1100-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]]
-; GFX1100-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX1100-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX1100-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("workgroup") seq_cst seq_cst, align 4
-; GFX1100-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX1100-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX1100-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX1100-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX1100-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX1100-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("workgroup") seq_cst seq_cst, align 4
+; GFX1100-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX1100-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX1100-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX1100-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX1100: atomicrmw.end:
-; GFX1100-NEXT: ret float [[TMP6]]
+; GFX1100-NEXT: ret float [[TMP5]]
;
; GFX11-LABEL: @no_unsafe(
; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr [[ADDR:%.*]], align 4
diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll
index 35e589f958695..87c4e92684200 100644
--- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll
+++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll
@@ -11,14 +11,14 @@ define void @test_atomicrmw_fadd_f32_global_no_use_unsafe(ptr addrspace(1) %ptr,
; CI-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; CI-NEXT: br label [[ATOMICRMW_START:%.*]]
; CI: atomicrmw.start:
-; CI-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; CI-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; CI-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; CI-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; CI-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; CI-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("wavefront") monotonic monotonic, align 4
-; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; CI-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; CI-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; CI-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; CI-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("wavefront") monotonic monotonic, align 4
+; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; CI-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; CI-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CI: atomicrmw.end:
; CI-NEXT: ret void
@@ -27,14 +27,14 @@ define void @test_atomicrmw_fadd_f32_global_no_use_unsafe(ptr addrspace(1) %ptr,
; GFX9-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; GFX9-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX9: atomicrmw.start:
-; GFX9-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX9-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX9-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX9-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX9-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("wavefront") monotonic monotonic, align 4
-; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX9-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX9-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX9-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX9-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("wavefront") monotonic monotonic, align 4
+; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX9-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX9: atomicrmw.end:
; GFX9-NEXT: ret void
@@ -64,49 +64,49 @@ define float @test_atomicrmw_fadd_f32_global_unsafe(ptr addrspace(1) %ptr, float
; CI-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; CI-NEXT: br label [[ATOMICRMW_START:%.*]]
; CI: atomicrmw.start:
-; CI-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; CI-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; CI-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; CI-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; CI-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; CI-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("wavefront") monotonic monotonic, align 4
-; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; CI-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; CI-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; CI-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; CI-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("wavefront") monotonic monotonic, align 4
+; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; CI-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; CI-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CI: atomicrmw.end:
-; CI-NEXT: ret float [[TMP6]]
+; CI-NEXT: ret float [[TMP5]]
;
; GFX9-LABEL: @test_atomicrmw_fadd_f32_global_unsafe(
; GFX9-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; GFX9-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX9: atomicrmw.start:
-; GFX9-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX9-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX9-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX9-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX9-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("wavefront") monotonic monotonic, align 4
-; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX9-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX9-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX9-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX9-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("wavefront") monotonic monotonic, align 4
+; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX9-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX9: atomicrmw.end:
-; GFX9-NEXT: ret float [[TMP6]]
+; GFX9-NEXT: ret float [[TMP5]]
;
; GFX908-LABEL: @test_atomicrmw_fadd_f32_global_unsafe(
; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX908: atomicrmw.start:
-; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("wavefront") monotonic monotonic, align 4
-; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("wavefront") monotonic monotonic, align 4
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX908: atomicrmw.end:
-; GFX908-NEXT: ret float [[TMP6]]
+; GFX908-NEXT: ret float [[TMP5]]
;
; GFX90A-LABEL: @test_atomicrmw_fadd_f32_global_unsafe(
; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4
@@ -129,49 +129,49 @@ define double @test_atomicrmw_fadd_f64_global_unsafe(ptr addrspace(1) %ptr, doub
; CI-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR:%.*]], align 8
; CI-NEXT: br label [[ATOMICRMW_START:%.*]]
; CI: atomicrmw.start:
-; CI-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; CI-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; CI-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE:%.*]]
-; CI-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
-; CI-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
-; CI-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("wavefront") monotonic monotonic, align 8
-; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
-; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
-; CI-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; CI-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; CI-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; CI-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("wavefront") monotonic monotonic, align 8
+; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; CI-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
; CI-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CI: atomicrmw.end:
-; CI-NEXT: ret double [[TMP6]]
+; CI-NEXT: ret double [[TMP5]]
;
; GFX9-LABEL: @test_atomicrmw_fadd_f64_global_unsafe(
; GFX9-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR:%.*]], align 8
; GFX9-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX9: atomicrmw.start:
-; GFX9-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX9-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX9-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE:%.*]]
-; GFX9-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
-; GFX9-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
-; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("wavefront") monotonic monotonic, align 8
-; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
-; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
-; GFX9-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX9-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX9-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX9-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("wavefront") monotonic monotonic, align 8
+; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX9-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX9: atomicrmw.end:
-; GFX9-NEXT: ret double [[TMP6]]
+; GFX9-NEXT: ret double [[TMP5]]
;
; GFX908-LABEL: @test_atomicrmw_fadd_f64_global_unsafe(
; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR:%.*]], align 8
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX908: atomicrmw.start:
-; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE:%.*]]
-; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
-; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
-; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("wavefront") monotonic monotonic, align 8
-; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
-; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
-; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("wavefront") monotonic monotonic, align 8
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX908: atomicrmw.end:
-; GFX908-NEXT: ret double [[TMP6]]
+; GFX908-NEXT: ret double [[TMP5]]
;
; GFX90A-LABEL: @test_atomicrmw_fadd_f64_global_unsafe(
; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]] syncscope("wavefront") monotonic, align 8
@@ -185,17 +185,17 @@ define double @test_atomicrmw_fadd_f64_global_unsafe(ptr addrspace(1) %ptr, doub
; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR:%.*]], align 8
; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX11: atomicrmw.start:
-; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE:%.*]]
-; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
-; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
-; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("wavefront") monotonic monotonic, align 8
-; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
-; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
-; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("wavefront") monotonic monotonic, align 8
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX11: atomicrmw.end:
-; GFX11-NEXT: ret double [[TMP6]]
+; GFX11-NEXT: ret double [[TMP5]]
;
%res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("wavefront") monotonic
ret double %res
@@ -206,49 +206,49 @@ define float @test_atomicrmw_fadd_f32_flat_unsafe(ptr %ptr, float %value) #0 {
; CI-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4
; CI-NEXT: br label [[ATOMICRMW_START:%.*]]
; CI: atomicrmw.start:
-; CI-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; CI-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; CI-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; CI-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; CI-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; CI-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("wavefront") monotonic monotonic, align 4
-; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; CI-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; CI-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; CI-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; CI-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("wavefront") monotonic monotonic, align 4
+; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; CI-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; CI-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CI: atomicrmw.end:
-; CI-NEXT: ret float [[TMP6]]
+; CI-NEXT: ret float [[TMP5]]
;
; GFX9-LABEL: @test_atomicrmw_fadd_f32_flat_unsafe(
; GFX9-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4
; GFX9-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX9: atomicrmw.start:
-; GFX9-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX9-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX9-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX9-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX9-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("wavefront") monotonic monotonic, align 4
-; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX9-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX9-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX9-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX9-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("wavefront") monotonic monotonic, align 4
+; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX9-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX9: atomicrmw.end:
-; GFX9-NEXT: ret float [[TMP6]]
+; GFX9-NEXT: ret float [[TMP5]]
;
; GFX908-LABEL: @test_atomicrmw_fadd_f32_flat_unsafe(
; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX908: atomicrmw.start:
-; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("wavefront") monotonic monotonic, align 4
-; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("wavefront") monotonic monotonic, align 4
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX908: atomicrmw.end:
-; GFX908-NEXT: ret float [[TMP6]]
+; GFX908-NEXT: ret float [[TMP5]]
;
; GFX90A-LABEL: @test_atomicrmw_fadd_f32_flat_unsafe(
; GFX90A-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]]
@@ -256,24 +256,24 @@ define float @test_atomicrmw_fadd_f32_flat_unsafe(ptr %ptr, float %value) #0 {
; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[PTR:%.*]])
; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]]
; GFX90A: atomicrmw.shared:
-; GFX90A-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(3)
-; GFX90A-NEXT: [[TMP3:%.*]] = atomicrmw fadd ptr addrspace(3) [[TMP2]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4
+; GFX90A-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(3)
+; GFX90A-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr addrspace(3) [[TMP1]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4
; GFX90A-NEXT: br label [[ATOMICRMW_PHI:%.*]]
; GFX90A: atomicrmw.check.private:
; GFX90A-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[PTR]])
; GFX90A-NEXT: br i1 [[IS_PRIVATE]], label [[ATOMICRMW_PRIVATE:%.*]], label [[ATOMICRMW_GLOBAL:%.*]]
; GFX90A: atomicrmw.private:
-; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(5)
-; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP4]], align 4
+; GFX90A-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(5)
+; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP3]], align 4
; GFX90A-NEXT: [[VAL_NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VALUE]]
-; GFX90A-NEXT: store float [[VAL_NEW]], ptr addrspace(5) [[TMP4]], align 4
+; GFX90A-NEXT: store float [[VAL_NEW]], ptr addrspace(5) [[TMP3]], align 4
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
; GFX90A: atomicrmw.global:
-; GFX90A-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(1)
-; GFX90A-NEXT: [[TMP6:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP5]], float [[VALUE]] syncscope("wavefront") monotonic, align 4
+; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(1)
+; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VALUE]] syncscope("wavefront") monotonic, align 4
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
; GFX90A: atomicrmw.phi:
-; GFX90A-NEXT: [[LOADED_PHI:%.*]] = phi float [ [[TMP3]], [[ATOMICRMW_SHARED]] ], [ [[LOADED_PRIVATE]], [[ATOMICRMW_PRIVATE]] ], [ [[TMP6]], [[ATOMICRMW_GLOBAL]] ]
+; GFX90A-NEXT: [[LOADED_PHI:%.*]] = phi float [ [[TMP2]], [[ATOMICRMW_SHARED]] ], [ [[LOADED_PRIVATE]], [[ATOMICRMW_PRIVATE]] ], [ [[TMP5]], [[ATOMICRMW_GLOBAL]] ]
; GFX90A-NEXT: br label [[ATOMICRMW_END:%.*]]
; GFX90A: atomicrmw.end:
; GFX90A-NEXT: ret float [[LOADED_PHI]]
@@ -295,49 +295,49 @@ define double @test_atomicrmw_fadd_f64_flat_unsafe(ptr %ptr, double %value) #0 {
; CI-NEXT: [[TMP1:%.*]] = load double, ptr [[PTR:%.*]], align 8
; CI-NEXT: br label [[ATOMICRMW_START:%.*]]
; CI: atomicrmw.start:
-; CI-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; CI-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; CI-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE:%.*]]
-; CI-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
-; CI-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
-; CI-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("wavefront") monotonic monotonic, align 8
-; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
-; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
-; CI-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; CI-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; CI-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; CI-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("wavefront") monotonic monotonic, align 8
+; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; CI-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
; CI-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CI: atomicrmw.end:
-; CI-NEXT: ret double [[TMP6]]
+; CI-NEXT: ret double [[TMP5]]
;
; GFX9-LABEL: @test_atomicrmw_fadd_f64_flat_unsafe(
; GFX9-NEXT: [[TMP1:%.*]] = load double, ptr [[PTR:%.*]], align 8
; GFX9-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX9: atomicrmw.start:
-; GFX9-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX9-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX9-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE:%.*]]
-; GFX9-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
-; GFX9-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
-; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("wavefront") monotonic monotonic, align 8
-; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
-; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
-; GFX9-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX9-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX9-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX9-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("wavefront") monotonic monotonic, align 8
+; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX9-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX9: atomicrmw.end:
-; GFX9-NEXT: ret double [[TMP6]]
+; GFX9-NEXT: ret double [[TMP5]]
;
; GFX908-LABEL: @test_atomicrmw_fadd_f64_flat_unsafe(
; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr [[PTR:%.*]], align 8
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX908: atomicrmw.start:
-; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE:%.*]]
-; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
-; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
-; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("wavefront") monotonic monotonic, align 8
-; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
-; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
-; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("wavefront") monotonic monotonic, align 8
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX908: atomicrmw.end:
-; GFX908-NEXT: ret double [[TMP6]]
+; GFX908-NEXT: ret double [[TMP5]]
;
; GFX90A-LABEL: @test_atomicrmw_fadd_f64_flat_unsafe(
; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR:%.*]], double [[VALUE:%.*]] syncscope("wavefront") monotonic, align 8
@@ -351,17 +351,17 @@ define double @test_atomicrmw_fadd_f64_flat_unsafe(ptr %ptr, double %value) #0 {
; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr [[PTR:%.*]], align 8
; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX11: atomicrmw.start:
-; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE:%.*]]
-; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
-; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
-; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("wavefront") monotonic monotonic, align 8
-; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
-; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
-; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("wavefront") monotonic monotonic, align 8
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX11: atomicrmw.end:
-; GFX11-NEXT: ret double [[TMP6]]
+; GFX11-NEXT: ret double [[TMP5]]
;
%res = atomicrmw fadd ptr %ptr, double %value syncscope("wavefront") monotonic
ret double %res
@@ -372,97 +372,97 @@ define float @test_atomicrmw_fadd_f32_flat(ptr %ptr, float %value) {
; CI-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4
; CI-NEXT: br label [[ATOMICRMW_START:%.*]]
; CI: atomicrmw.start:
-; CI-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; CI-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; CI-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; CI-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; CI-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; CI-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
-; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; CI-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; CI-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; CI-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; CI-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4
+; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; CI-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; CI-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CI: atomicrmw.end:
-; CI-NEXT: ret float [[TMP6]]
+; CI-NEXT: ret float [[TMP5]]
;
; GFX9-LABEL: @test_atomicrmw_fadd_f32_flat(
; GFX9-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4
; GFX9-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX9: atomicrmw.start:
-; GFX9-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX9-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX9-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX9-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX9-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
-; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX9-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX9-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX9-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX9-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4
+; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX9-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX9: atomicrmw.end:
-; GFX9-NEXT: ret float [[TMP6]]
+; GFX9-NEXT: ret float [[TMP5]]
;
; GFX908-LABEL: @test_atomicrmw_fadd_f32_flat(
; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX908: atomicrmw.start:
-; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
-; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX908: atomicrmw.end:
-; GFX908-NEXT: ret float [[TMP6]]
+; GFX908-NEXT: ret float [[TMP5]]
;
; GFX90A-LABEL: @test_atomicrmw_fadd_f32_flat(
; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4
; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX90A: atomicrmw.start:
-; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
-; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4
+; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX90A: atomicrmw.end:
-; GFX90A-NEXT: ret float [[TMP6]]
+; GFX90A-NEXT: ret float [[TMP5]]
;
; GFX940-LABEL: @test_atomicrmw_fadd_f32_flat(
; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4
; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX940: atomicrmw.start:
-; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX940-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
-; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX940-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX940-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4
+; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX940-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX940: atomicrmw.end:
-; GFX940-NEXT: ret float [[TMP6]]
+; GFX940-NEXT: ret float [[TMP5]]
;
; GFX11-LABEL: @test_atomicrmw_fadd_f32_flat(
; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4
; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX11: atomicrmw.start:
-; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
-; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX11-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX11-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX11-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX11: atomicrmw.end:
-; GFX11-NEXT: ret float [[TMP6]]
+; GFX11-NEXT: ret float [[TMP5]]
;
%res = atomicrmw fadd ptr %ptr, float %value seq_cst
ret float %res
@@ -473,97 +473,97 @@ define float @test_atomicrmw_fadd_f32_global(ptr addrspace(1) %ptr, float %value
; CI-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; CI-NEXT: br label [[ATOMICRMW_START:%.*]]
; CI: atomicrmw.start:
-; CI-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; CI-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; CI-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; CI-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; CI-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; CI-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
-; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; CI-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; CI-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; CI-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; CI-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4
+; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; CI-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; CI-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CI: atomicrmw.end:
-; CI-NEXT: ret float [[TMP6]]
+; CI-NEXT: ret float [[TMP5]]
;
; GFX9-LABEL: @test_atomicrmw_fadd_f32_global(
; GFX9-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; GFX9-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX9: atomicrmw.start:
-; GFX9-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX9-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX9-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX9-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX9-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
-; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX9-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX9-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX9-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX9-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4
+; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX9-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX9: atomicrmw.end:
-; GFX9-NEXT: ret float [[TMP6]]
+; GFX9-NEXT: ret float [[TMP5]]
;
; GFX908-LABEL: @test_atomicrmw_fadd_f32_global(
; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX908: atomicrmw.start:
-; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
-; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX908: atomicrmw.end:
-; GFX908-NEXT: ret float [[TMP6]]
+; GFX908-NEXT: ret float [[TMP5]]
;
; GFX90A-LABEL: @test_atomicrmw_fadd_f32_global(
; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX90A: atomicrmw.start:
-; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
-; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4
+; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX90A: atomicrmw.end:
-; GFX90A-NEXT: ret float [[TMP6]]
+; GFX90A-NEXT: ret float [[TMP5]]
;
; GFX940-LABEL: @test_atomicrmw_fadd_f32_global(
; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX940: atomicrmw.start:
-; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX940-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
-; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX940-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX940-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4
+; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX940-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX940: atomicrmw.end:
-; GFX940-NEXT: ret float [[TMP6]]
+; GFX940-NEXT: ret float [[TMP5]]
;
; GFX11-LABEL: @test_atomicrmw_fadd_f32_global(
; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX11: atomicrmw.start:
-; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
-; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX11-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX11-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX11-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX11: atomicrmw.end:
-; GFX11-NEXT: ret float [[TMP6]]
+; GFX11-NEXT: ret float [[TMP5]]
;
%res = atomicrmw fadd ptr addrspace(1) %ptr, float %value seq_cst
ret float %res
@@ -574,14 +574,14 @@ define void @test_atomicrmw_fadd_f32_global_no_use_ieee(ptr addrspace(1) %ptr, f
; CI-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; CI-NEXT: br label [[ATOMICRMW_START:%.*]]
; CI: atomicrmw.start:
-; CI-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; CI-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; CI-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; CI-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; CI-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; CI-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
-; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; CI-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; CI-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; CI-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; CI-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4
+; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; CI-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; CI-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CI: atomicrmw.end:
; CI-NEXT: ret void
@@ -590,14 +590,14 @@ define void @test_atomicrmw_fadd_f32_global_no_use_ieee(ptr addrspace(1) %ptr, f
; GFX9-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; GFX9-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX9: atomicrmw.start:
-; GFX9-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX9-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX9-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX9-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX9-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
-; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX9-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX9-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX9-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX9-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4
+; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX9-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX9: atomicrmw.end:
; GFX9-NEXT: ret void
@@ -606,14 +606,14 @@ define void @test_atomicrmw_fadd_f32_global_no_use_ieee(ptr addrspace(1) %ptr, f
; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX908: atomicrmw.start:
-; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
-; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX908: atomicrmw.end:
; GFX908-NEXT: ret void
@@ -622,14 +622,14 @@ define void @test_atomicrmw_fadd_f32_global_no_use_ieee(ptr addrspace(1) %ptr, f
; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX90A: atomicrmw.start:
-; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
-; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4
+; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX90A: atomicrmw.end:
; GFX90A-NEXT: ret void
@@ -638,14 +638,14 @@ define void @test_atomicrmw_fadd_f32_global_no_use_ieee(ptr addrspace(1) %ptr, f
; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX940: atomicrmw.start:
-; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX940-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
-; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX940-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX940-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4
+; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX940-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX940: atomicrmw.end:
; GFX940-NEXT: ret void
@@ -654,14 +654,14 @@ define void @test_atomicrmw_fadd_f32_global_no_use_ieee(ptr addrspace(1) %ptr, f
; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX11: atomicrmw.start:
-; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
-; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX11-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX11-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX11-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX11: atomicrmw.end:
; GFX11-NEXT: ret void
@@ -675,14 +675,14 @@ define void @test_atomicrmw_fadd_f32_global_no_use_denorm_flush(ptr addrspace(1)
; CI-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; CI-NEXT: br label [[ATOMICRMW_START:%.*]]
; CI: atomicrmw.start:
-; CI-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; CI-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; CI-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; CI-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; CI-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; CI-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
-; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; CI-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; CI-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; CI-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; CI-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4
+; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; CI-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; CI-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CI: atomicrmw.end:
; CI-NEXT: ret void
@@ -691,14 +691,14 @@ define void @test_atomicrmw_fadd_f32_global_no_use_denorm_flush(ptr addrspace(1)
; GFX9-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; GFX9-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX9: atomicrmw.start:
-; GFX9-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX9-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX9-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX9-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX9-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
-; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX9-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX9-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX9-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX9-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4
+; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX9-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX9: atomicrmw.end:
; GFX9-NEXT: ret void
@@ -707,14 +707,14 @@ define void @test_atomicrmw_fadd_f32_global_no_use_denorm_flush(ptr addrspace(1)
; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX908: atomicrmw.start:
-; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
-; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX908: atomicrmw.end:
; GFX908-NEXT: ret void
@@ -723,14 +723,14 @@ define void @test_atomicrmw_fadd_f32_global_no_use_denorm_flush(ptr addrspace(1)
; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX90A: atomicrmw.start:
-; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
-; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4
+; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX90A: atomicrmw.end:
; GFX90A-NEXT: ret void
@@ -739,14 +739,14 @@ define void @test_atomicrmw_fadd_f32_global_no_use_denorm_flush(ptr addrspace(1)
; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX940: atomicrmw.start:
-; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX940-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
-; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX940-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX940-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4
+; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX940-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX940: atomicrmw.end:
; GFX940-NEXT: ret void
@@ -755,14 +755,14 @@ define void @test_atomicrmw_fadd_f32_global_no_use_denorm_flush(ptr addrspace(1)
; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX11: atomicrmw.start:
-; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
-; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX11-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX11-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX11-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX11: atomicrmw.end:
; GFX11-NEXT: ret void
@@ -776,17 +776,17 @@ define float @test_atomicrmw_fadd_f32_local(ptr addrspace(3) %ptr, float %value)
; CI-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(3) [[PTR:%.*]], align 4
; CI-NEXT: br label [[ATOMICRMW_START:%.*]]
; CI: atomicrmw.start:
-; CI-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; CI-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; CI-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; CI-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; CI-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; CI-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(3) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
-; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; CI-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; CI-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; CI-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; CI-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(3) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4
+; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; CI-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; CI-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CI: atomicrmw.end:
-; CI-NEXT: ret float [[TMP6]]
+; CI-NEXT: ret float [[TMP5]]
;
; GFX9-LABEL: @test_atomicrmw_fadd_f32_local(
; GFX9-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] seq_cst, align 4
@@ -831,17 +831,17 @@ define half @test_atomicrmw_fadd_f16_flat(ptr %ptr, half %value) {
; CI-NEXT: [[NEW:%.*]] = fadd half [[TMP4]], [[VALUE:%.*]]
; CI-NEXT: [[TMP5:%.*]] = bitcast half [[NEW]] to i16
; CI-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
-; CI-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
+; CI-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
; CI-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
-; CI-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]]
+; CI-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
; CI-NEXT: [[TMP6:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; CI-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; CI-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CI: atomicrmw.end:
-; CI-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
-; CI-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16
-; CI-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED4]] to half
+; CI-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
+; CI-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
+; CI-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED3]] to half
; CI-NEXT: ret half [[TMP7]]
;
; GFX9-LABEL: @test_atomicrmw_fadd_f16_flat(
@@ -862,17 +862,17 @@ define half @test_atomicrmw_fadd_f16_flat(ptr %ptr, half %value) {
; GFX9-NEXT: [[NEW:%.*]] = fadd half [[TMP4]], [[VALUE:%.*]]
; GFX9-NEXT: [[TMP5:%.*]] = bitcast half [[NEW]] to i16
; GFX9-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
-; GFX9-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
+; GFX9-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
; GFX9-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
-; GFX9-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]]
+; GFX9-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
; GFX9-NEXT: [[TMP6:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; GFX9-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX9: atomicrmw.end:
-; GFX9-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
-; GFX9-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16
-; GFX9-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED4]] to half
+; GFX9-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
+; GFX9-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
+; GFX9-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED3]] to half
; GFX9-NEXT: ret half [[TMP7]]
;
; GFX908-LABEL: @test_atomicrmw_fadd_f16_flat(
@@ -893,17 +893,17 @@ define half @test_atomicrmw_fadd_f16_flat(ptr %ptr, half %value) {
; GFX908-NEXT: [[NEW:%.*]] = fadd half [[TMP4]], [[VALUE:%.*]]
; GFX908-NEXT: [[TMP5:%.*]] = bitcast half [[NEW]] to i16
; GFX908-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
-; GFX908-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
+; GFX908-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
; GFX908-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
-; GFX908-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]]
+; GFX908-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
; GFX908-NEXT: [[TMP6:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; GFX908-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX908: atomicrmw.end:
-; GFX908-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
-; GFX908-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16
-; GFX908-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED4]] to half
+; GFX908-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
+; GFX908-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
+; GFX908-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED3]] to half
; GFX908-NEXT: ret half [[TMP7]]
;
; GFX90A-LABEL: @test_atomicrmw_fadd_f16_flat(
@@ -924,17 +924,17 @@ define half @test_atomicrmw_fadd_f16_flat(ptr %ptr, half %value) {
; GFX90A-NEXT: [[NEW:%.*]] = fadd half [[TMP4]], [[VALUE:%.*]]
; GFX90A-NEXT: [[TMP5:%.*]] = bitcast half [[NEW]] to i16
; GFX90A-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
-; GFX90A-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
+; GFX90A-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
; GFX90A-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
-; GFX90A-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]]
+; GFX90A-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
; GFX90A-NEXT: [[TMP6:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; GFX90A-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX90A: atomicrmw.end:
-; GFX90A-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
-; GFX90A-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16
-; GFX90A-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED4]] to half
+; GFX90A-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
+; GFX90A-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
+; GFX90A-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED3]] to half
; GFX90A-NEXT: ret half [[TMP7]]
;
; GFX940-LABEL: @test_atomicrmw_fadd_f16_flat(
@@ -955,17 +955,17 @@ define half @test_atomicrmw_fadd_f16_flat(ptr %ptr, half %value) {
; GFX940-NEXT: [[NEW:%.*]] = fadd half [[TMP4]], [[VALUE:%.*]]
; GFX940-NEXT: [[TMP5:%.*]] = bitcast half [[NEW]] to i16
; GFX940-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
-; GFX940-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
+; GFX940-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
; GFX940-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
-; GFX940-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]]
+; GFX940-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
; GFX940-NEXT: [[TMP6:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; GFX940-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX940: atomicrmw.end:
-; GFX940-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
-; GFX940-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16
-; GFX940-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED4]] to half
+; GFX940-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
+; GFX940-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
+; GFX940-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED3]] to half
; GFX940-NEXT: ret half [[TMP7]]
;
; GFX11-LABEL: @test_atomicrmw_fadd_f16_flat(
@@ -986,17 +986,17 @@ define half @test_atomicrmw_fadd_f16_flat(ptr %ptr, half %value) {
; GFX11-NEXT: [[NEW:%.*]] = fadd half [[TMP4]], [[VALUE:%.*]]
; GFX11-NEXT: [[TMP5:%.*]] = bitcast half [[NEW]] to i16
; GFX11-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
-; GFX11-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
+; GFX11-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
; GFX11-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
-; GFX11-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]]
+; GFX11-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
; GFX11-NEXT: [[TMP6:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; GFX11-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX11: atomicrmw.end:
-; GFX11-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
-; GFX11-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16
-; GFX11-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED4]] to half
+; GFX11-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
+; GFX11-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
+; GFX11-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED3]] to half
; GFX11-NEXT: ret half [[TMP7]]
;
%res = atomicrmw fadd ptr %ptr, half %value seq_cst
@@ -1022,17 +1022,17 @@ define half @test_atomicrmw_fadd_f16_global(ptr addrspace(1) %ptr, half %value)
; CI-NEXT: [[NEW:%.*]] = fadd half [[TMP4]], [[VALUE:%.*]]
; CI-NEXT: [[TMP5:%.*]] = bitcast half [[NEW]] to i16
; CI-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
-; CI-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
+; CI-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
; CI-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
-; CI-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]]
+; CI-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
; CI-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; CI-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; CI-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CI: atomicrmw.end:
-; CI-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
-; CI-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16
-; CI-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED4]] to half
+; CI-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
+; CI-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
+; CI-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED3]] to half
; CI-NEXT: ret half [[TMP7]]
;
; GFX9-LABEL: @test_atomicrmw_fadd_f16_global(
@@ -1053,17 +1053,17 @@ define half @test_atomicrmw_fadd_f16_global(ptr addrspace(1) %ptr, half %value)
; GFX9-NEXT: [[NEW:%.*]] = fadd half [[TMP4]], [[VALUE:%.*]]
; GFX9-NEXT: [[TMP5:%.*]] = bitcast half [[NEW]] to i16
; GFX9-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
-; GFX9-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
+; GFX9-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
; GFX9-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
-; GFX9-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]]
+; GFX9-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
; GFX9-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; GFX9-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX9: atomicrmw.end:
-; GFX9-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
-; GFX9-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16
-; GFX9-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED4]] to half
+; GFX9-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
+; GFX9-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
+; GFX9-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED3]] to half
; GFX9-NEXT: ret half [[TMP7]]
;
; GFX908-LABEL: @test_atomicrmw_fadd_f16_global(
@@ -1084,17 +1084,17 @@ define half @test_atomicrmw_fadd_f16_global(ptr addrspace(1) %ptr, half %value)
; GFX908-NEXT: [[NEW:%.*]] = fadd half [[TMP4]], [[VALUE:%.*]]
; GFX908-NEXT: [[TMP5:%.*]] = bitcast half [[NEW]] to i16
; GFX908-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
-; GFX908-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
+; GFX908-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
; GFX908-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
-; GFX908-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]]
+; GFX908-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
; GFX908-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; GFX908-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX908: atomicrmw.end:
-; GFX908-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
-; GFX908-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16
-; GFX908-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED4]] to half
+; GFX908-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
+; GFX908-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
+; GFX908-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED3]] to half
; GFX908-NEXT: ret half [[TMP7]]
;
; GFX90A-LABEL: @test_atomicrmw_fadd_f16_global(
@@ -1115,17 +1115,17 @@ define half @test_atomicrmw_fadd_f16_global(ptr addrspace(1) %ptr, half %value)
; GFX90A-NEXT: [[NEW:%.*]] = fadd half [[TMP4]], [[VALUE:%.*]]
; GFX90A-NEXT: [[TMP5:%.*]] = bitcast half [[NEW]] to i16
; GFX90A-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
-; GFX90A-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
+; GFX90A-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
; GFX90A-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
-; GFX90A-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]]
+; GFX90A-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
; GFX90A-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; GFX90A-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX90A: atomicrmw.end:
-; GFX90A-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
-; GFX90A-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16
-; GFX90A-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED4]] to half
+; GFX90A-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
+; GFX90A-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
+; GFX90A-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED3]] to half
; GFX90A-NEXT: ret half [[TMP7]]
;
; GFX940-LABEL: @test_atomicrmw_fadd_f16_global(
@@ -1146,17 +1146,17 @@ define half @test_atomicrmw_fadd_f16_global(ptr addrspace(1) %ptr, half %value)
; GFX940-NEXT: [[NEW:%.*]] = fadd half [[TMP4]], [[VALUE:%.*]]
; GFX940-NEXT: [[TMP5:%.*]] = bitcast half [[NEW]] to i16
; GFX940-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
-; GFX940-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
+; GFX940-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
; GFX940-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
-; GFX940-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]]
+; GFX940-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
; GFX940-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; GFX940-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX940: atomicrmw.end:
-; GFX940-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
-; GFX940-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16
-; GFX940-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED4]] to half
+; GFX940-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
+; GFX940-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
+; GFX940-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED3]] to half
; GFX940-NEXT: ret half [[TMP7]]
;
; GFX11-LABEL: @test_atomicrmw_fadd_f16_global(
@@ -1177,17 +1177,17 @@ define half @test_atomicrmw_fadd_f16_global(ptr addrspace(1) %ptr, half %value)
; GFX11-NEXT: [[NEW:%.*]] = fadd half [[TMP4]], [[VALUE:%.*]]
; GFX11-NEXT: [[TMP5:%.*]] = bitcast half [[NEW]] to i16
; GFX11-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
-; GFX11-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
+; GFX11-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
; GFX11-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
-; GFX11-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]]
+; GFX11-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
; GFX11-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; GFX11-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX11: atomicrmw.end:
-; GFX11-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
-; GFX11-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16
-; GFX11-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED4]] to half
+; GFX11-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
+; GFX11-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
+; GFX11-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED3]] to half
; GFX11-NEXT: ret half [[TMP7]]
;
%res = atomicrmw fadd ptr addrspace(1) %ptr, half %value seq_cst
@@ -1344,17 +1344,17 @@ define half @test_atomicrmw_fadd_f16_local(ptr addrspace(3) %ptr, half %value) {
; CI-NEXT: [[NEW:%.*]] = fadd half [[TMP4]], [[VALUE:%.*]]
; CI-NEXT: [[TMP5:%.*]] = bitcast half [[NEW]] to i16
; CI-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
-; CI-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
+; CI-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
; CI-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
-; CI-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]]
+; CI-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
; CI-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(3) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; CI-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; CI-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CI: atomicrmw.end:
-; CI-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
-; CI-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16
-; CI-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED4]] to half
+; CI-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
+; CI-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
+; CI-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED3]] to half
; CI-NEXT: ret half [[TMP7]]
;
; GFX9-LABEL: @test_atomicrmw_fadd_f16_local(
@@ -1375,17 +1375,17 @@ define half @test_atomicrmw_fadd_f16_local(ptr addrspace(3) %ptr, half %value) {
; GFX9-NEXT: [[NEW:%.*]] = fadd half [[TMP4]], [[VALUE:%.*]]
; GFX9-NEXT: [[TMP5:%.*]] = bitcast half [[NEW]] to i16
; GFX9-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
-; GFX9-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
+; GFX9-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
; GFX9-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
-; GFX9-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]]
+; GFX9-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
; GFX9-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(3) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; GFX9-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX9: atomicrmw.end:
-; GFX9-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
-; GFX9-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16
-; GFX9-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED4]] to half
+; GFX9-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
+; GFX9-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
+; GFX9-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED3]] to half
; GFX9-NEXT: ret half [[TMP7]]
;
; GFX908-LABEL: @test_atomicrmw_fadd_f16_local(
@@ -1406,17 +1406,17 @@ define half @test_atomicrmw_fadd_f16_local(ptr addrspace(3) %ptr, half %value) {
; GFX908-NEXT: [[NEW:%.*]] = fadd half [[TMP4]], [[VALUE:%.*]]
; GFX908-NEXT: [[TMP5:%.*]] = bitcast half [[NEW]] to i16
; GFX908-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
-; GFX908-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
+; GFX908-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
; GFX908-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
-; GFX908-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]]
+; GFX908-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
; GFX908-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(3) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; GFX908-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX908: atomicrmw.end:
-; GFX908-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
-; GFX908-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16
-; GFX908-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED4]] to half
+; GFX908-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
+; GFX908-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
+; GFX908-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED3]] to half
; GFX908-NEXT: ret half [[TMP7]]
;
; GFX90A-LABEL: @test_atomicrmw_fadd_f16_local(
@@ -1437,17 +1437,17 @@ define half @test_atomicrmw_fadd_f16_local(ptr addrspace(3) %ptr, half %value) {
; GFX90A-NEXT: [[NEW:%.*]] = fadd half [[TMP4]], [[VALUE:%.*]]
; GFX90A-NEXT: [[TMP5:%.*]] = bitcast half [[NEW]] to i16
; GFX90A-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
-; GFX90A-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
+; GFX90A-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
; GFX90A-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
-; GFX90A-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]]
+; GFX90A-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
; GFX90A-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(3) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; GFX90A-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX90A: atomicrmw.end:
-; GFX90A-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
-; GFX90A-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16
-; GFX90A-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED4]] to half
+; GFX90A-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
+; GFX90A-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
+; GFX90A-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED3]] to half
; GFX90A-NEXT: ret half [[TMP7]]
;
; GFX940-LABEL: @test_atomicrmw_fadd_f16_local(
@@ -1468,17 +1468,17 @@ define half @test_atomicrmw_fadd_f16_local(ptr addrspace(3) %ptr, half %value) {
; GFX940-NEXT: [[NEW:%.*]] = fadd half [[TMP4]], [[VALUE:%.*]]
; GFX940-NEXT: [[TMP5:%.*]] = bitcast half [[NEW]] to i16
; GFX940-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
-; GFX940-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
+; GFX940-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
; GFX940-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
-; GFX940-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]]
+; GFX940-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
; GFX940-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(3) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; GFX940-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX940: atomicrmw.end:
-; GFX940-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
-; GFX940-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16
-; GFX940-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED4]] to half
+; GFX940-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
+; GFX940-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
+; GFX940-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED3]] to half
; GFX940-NEXT: ret half [[TMP7]]
;
; GFX11-LABEL: @test_atomicrmw_fadd_f16_local(
@@ -1499,17 +1499,17 @@ define half @test_atomicrmw_fadd_f16_local(ptr addrspace(3) %ptr, half %value) {
; GFX11-NEXT: [[NEW:%.*]] = fadd half [[TMP4]], [[VALUE:%.*]]
; GFX11-NEXT: [[TMP5:%.*]] = bitcast half [[NEW]] to i16
; GFX11-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
-; GFX11-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
+; GFX11-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
; GFX11-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
-; GFX11-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]]
+; GFX11-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
; GFX11-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(3) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; GFX11-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX11: atomicrmw.end:
-; GFX11-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
-; GFX11-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16
-; GFX11-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED4]] to half
+; GFX11-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
+; GFX11-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
+; GFX11-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED3]] to half
; GFX11-NEXT: ret half [[TMP7]]
;
%res = atomicrmw fadd ptr addrspace(3) %ptr, half %value seq_cst
@@ -1521,97 +1521,97 @@ define double @test_atomicrmw_fadd_f64_flat(ptr %ptr, double %value) {
; CI-NEXT: [[TMP1:%.*]] = load double, ptr [[PTR:%.*]], align 8
; CI-NEXT: br label [[ATOMICRMW_START:%.*]]
; CI: atomicrmw.start:
-; CI-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; CI-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; CI-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE:%.*]]
-; CI-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
-; CI-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
-; CI-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
-; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
-; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
-; CI-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; CI-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; CI-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; CI-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
+; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; CI-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
; CI-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CI: atomicrmw.end:
-; CI-NEXT: ret double [[TMP6]]
+; CI-NEXT: ret double [[TMP5]]
;
; GFX9-LABEL: @test_atomicrmw_fadd_f64_flat(
; GFX9-NEXT: [[TMP1:%.*]] = load double, ptr [[PTR:%.*]], align 8
; GFX9-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX9: atomicrmw.start:
-; GFX9-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX9-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX9-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE:%.*]]
-; GFX9-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
-; GFX9-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
-; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
-; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
-; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
-; GFX9-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX9-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX9-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX9-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
+; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX9-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX9: atomicrmw.end:
-; GFX9-NEXT: ret double [[TMP6]]
+; GFX9-NEXT: ret double [[TMP5]]
;
; GFX908-LABEL: @test_atomicrmw_fadd_f64_flat(
; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr [[PTR:%.*]], align 8
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX908: atomicrmw.start:
-; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE:%.*]]
-; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
-; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
-; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
-; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
-; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
-; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX908: atomicrmw.end:
-; GFX908-NEXT: ret double [[TMP6]]
+; GFX908-NEXT: ret double [[TMP5]]
;
; GFX90A-LABEL: @test_atomicrmw_fadd_f64_flat(
; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr [[PTR:%.*]], align 8
; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX90A: atomicrmw.start:
-; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX90A-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE:%.*]]
-; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
-; GFX90A-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
-; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
-; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
-; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
-; GFX90A-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX90A-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
+; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX90A-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX90A: atomicrmw.end:
-; GFX90A-NEXT: ret double [[TMP6]]
+; GFX90A-NEXT: ret double [[TMP5]]
;
; GFX940-LABEL: @test_atomicrmw_fadd_f64_flat(
; GFX940-NEXT: [[TMP1:%.*]] = load double, ptr [[PTR:%.*]], align 8
; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX940: atomicrmw.start:
-; GFX940-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX940-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX940-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE:%.*]]
-; GFX940-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
-; GFX940-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
-; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
-; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
-; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
-; GFX940-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX940-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX940-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX940-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
+; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX940-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX940: atomicrmw.end:
-; GFX940-NEXT: ret double [[TMP6]]
+; GFX940-NEXT: ret double [[TMP5]]
;
; GFX11-LABEL: @test_atomicrmw_fadd_f64_flat(
; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr [[PTR:%.*]], align 8
; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX11: atomicrmw.start:
-; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE:%.*]]
-; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
-; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
-; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
-; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
-; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
-; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX11: atomicrmw.end:
-; GFX11-NEXT: ret double [[TMP6]]
+; GFX11-NEXT: ret double [[TMP5]]
;
%res = atomicrmw fadd ptr %ptr, double %value seq_cst
ret double %res
@@ -1622,97 +1622,97 @@ define double @test_atomicrmw_fadd_f64_global(ptr addrspace(1) %ptr, double %val
; CI-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR:%.*]], align 8
; CI-NEXT: br label [[ATOMICRMW_START:%.*]]
; CI: atomicrmw.start:
-; CI-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; CI-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; CI-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE:%.*]]
-; CI-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
-; CI-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
-; CI-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
-; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
-; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
-; CI-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; CI-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; CI-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; CI-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
+; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; CI-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
; CI-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CI: atomicrmw.end:
-; CI-NEXT: ret double [[TMP6]]
+; CI-NEXT: ret double [[TMP5]]
;
; GFX9-LABEL: @test_atomicrmw_fadd_f64_global(
; GFX9-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR:%.*]], align 8
; GFX9-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX9: atomicrmw.start:
-; GFX9-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX9-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX9-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE:%.*]]
-; GFX9-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
-; GFX9-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
-; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
-; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
-; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
-; GFX9-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX9-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX9-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX9-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
+; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX9-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX9: atomicrmw.end:
-; GFX9-NEXT: ret double [[TMP6]]
+; GFX9-NEXT: ret double [[TMP5]]
;
; GFX908-LABEL: @test_atomicrmw_fadd_f64_global(
; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR:%.*]], align 8
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX908: atomicrmw.start:
-; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE:%.*]]
-; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
-; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
-; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
-; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
-; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
-; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX908: atomicrmw.end:
-; GFX908-NEXT: ret double [[TMP6]]
+; GFX908-NEXT: ret double [[TMP5]]
;
; GFX90A-LABEL: @test_atomicrmw_fadd_f64_global(
; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR:%.*]], align 8
; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX90A: atomicrmw.start:
-; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX90A-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE:%.*]]
-; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
-; GFX90A-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
-; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
-; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
-; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
-; GFX90A-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX90A-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
+; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX90A-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX90A: atomicrmw.end:
-; GFX90A-NEXT: ret double [[TMP6]]
+; GFX90A-NEXT: ret double [[TMP5]]
;
; GFX940-LABEL: @test_atomicrmw_fadd_f64_global(
; GFX940-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR:%.*]], align 8
; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX940: atomicrmw.start:
-; GFX940-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX940-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX940-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE:%.*]]
-; GFX940-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
-; GFX940-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
-; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
-; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
-; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
-; GFX940-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX940-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX940-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX940-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
+; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX940-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX940: atomicrmw.end:
-; GFX940-NEXT: ret double [[TMP6]]
+; GFX940-NEXT: ret double [[TMP5]]
;
; GFX11-LABEL: @test_atomicrmw_fadd_f64_global(
; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR:%.*]], align 8
; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX11: atomicrmw.start:
-; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE:%.*]]
-; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
-; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
-; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
-; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
-; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
-; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX11: atomicrmw.end:
-; GFX11-NEXT: ret double [[TMP6]]
+; GFX11-NEXT: ret double [[TMP5]]
;
%res = atomicrmw fadd ptr addrspace(1) %ptr, double %value seq_cst
ret double %res
@@ -1723,49 +1723,49 @@ define double @test_atomicrmw_fadd_f64_local(ptr addrspace(3) %ptr, double %valu
; CI-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(3) [[PTR:%.*]], align 8
; CI-NEXT: br label [[ATOMICRMW_START:%.*]]
; CI: atomicrmw.start:
-; CI-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; CI-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; CI-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE:%.*]]
-; CI-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
-; CI-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
-; CI-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(3) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
-; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
-; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
-; CI-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; CI-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; CI-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; CI-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(3) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
+; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; CI-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
; CI-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CI: atomicrmw.end:
-; CI-NEXT: ret double [[TMP6]]
+; CI-NEXT: ret double [[TMP5]]
;
; GFX9-LABEL: @test_atomicrmw_fadd_f64_local(
; GFX9-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(3) [[PTR:%.*]], align 8
; GFX9-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX9: atomicrmw.start:
-; GFX9-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX9-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX9-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE:%.*]]
-; GFX9-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
-; GFX9-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
-; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(3) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
-; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
-; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
-; GFX9-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX9-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX9-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX9-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(3) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
+; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX9-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX9: atomicrmw.end:
-; GFX9-NEXT: ret double [[TMP6]]
+; GFX9-NEXT: ret double [[TMP5]]
;
; GFX908-LABEL: @test_atomicrmw_fadd_f64_local(
; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(3) [[PTR:%.*]], align 8
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX908: atomicrmw.start:
-; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE:%.*]]
-; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
-; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
-; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(3) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
-; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
-; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
-; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(3) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX908: atomicrmw.end:
-; GFX908-NEXT: ret double [[TMP6]]
+; GFX908-NEXT: ret double [[TMP5]]
;
; GFX90A-LABEL: @test_atomicrmw_fadd_f64_local(
; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], double [[VALUE:%.*]] seq_cst, align 8
@@ -1779,17 +1779,17 @@ define double @test_atomicrmw_fadd_f64_local(ptr addrspace(3) %ptr, double %valu
; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(3) [[PTR:%.*]], align 8
; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX11: atomicrmw.start:
-; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE:%.*]]
-; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
-; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
-; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(3) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
-; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
-; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
-; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(3) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX11: atomicrmw.end:
-; GFX11-NEXT: ret double [[TMP6]]
+; GFX11-NEXT: ret double [[TMP5]]
;
%res = atomicrmw fadd ptr addrspace(3) %ptr, double %value seq_cst
ret double %res
@@ -1800,97 +1800,97 @@ define float @test_atomicrmw_fadd_f32_global_agent(ptr addrspace(1) %ptr, float
; CI-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; CI-NEXT: br label [[ATOMICRMW_START:%.*]]
; CI: atomicrmw.start:
-; CI-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; CI-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; CI-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; CI-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; CI-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; CI-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") monotonic monotonic, align 4
-; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; CI-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; CI-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; CI-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; CI-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") monotonic monotonic, align 4
+; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; CI-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; CI-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CI: atomicrmw.end:
-; CI-NEXT: ret float [[TMP6]]
+; CI-NEXT: ret float [[TMP5]]
;
; GFX9-LABEL: @test_atomicrmw_fadd_f32_global_agent(
; GFX9-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; GFX9-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX9: atomicrmw.start:
-; GFX9-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX9-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX9-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX9-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX9-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") monotonic monotonic, align 4
-; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX9-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX9-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX9-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX9-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") monotonic monotonic, align 4
+; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX9-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX9: atomicrmw.end:
-; GFX9-NEXT: ret float [[TMP6]]
+; GFX9-NEXT: ret float [[TMP5]]
;
; GFX908-LABEL: @test_atomicrmw_fadd_f32_global_agent(
; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX908: atomicrmw.start:
-; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") monotonic monotonic, align 4
-; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") monotonic monotonic, align 4
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX908: atomicrmw.end:
-; GFX908-NEXT: ret float [[TMP6]]
+; GFX908-NEXT: ret float [[TMP5]]
;
; GFX90A-LABEL: @test_atomicrmw_fadd_f32_global_agent(
; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX90A: atomicrmw.start:
-; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") monotonic monotonic, align 4
-; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") monotonic monotonic, align 4
+; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX90A: atomicrmw.end:
-; GFX90A-NEXT: ret float [[TMP6]]
+; GFX90A-NEXT: ret float [[TMP5]]
;
; GFX940-LABEL: @test_atomicrmw_fadd_f32_global_agent(
; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX940: atomicrmw.start:
-; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX940-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") monotonic monotonic, align 4
-; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX940-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX940-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") monotonic monotonic, align 4
+; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX940-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX940: atomicrmw.end:
-; GFX940-NEXT: ret float [[TMP6]]
+; GFX940-NEXT: ret float [[TMP5]]
;
; GFX11-LABEL: @test_atomicrmw_fadd_f32_global_agent(
; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX11: atomicrmw.start:
-; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") monotonic monotonic, align 4
-; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX11-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX11-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") monotonic monotonic, align 4
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX11-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX11: atomicrmw.end:
-; GFX11-NEXT: ret float [[TMP6]]
+; GFX11-NEXT: ret float [[TMP5]]
;
%res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") monotonic
ret float %res
@@ -1901,97 +1901,97 @@ define float @test_atomicrmw_fadd_f32_global_one_as(ptr addrspace(1) %ptr, float
; CI-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; CI-NEXT: br label [[ATOMICRMW_START:%.*]]
; CI: atomicrmw.start:
-; CI-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; CI-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; CI-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; CI-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; CI-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; CI-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("one-as") monotonic monotonic, align 4
-; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; CI-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; CI-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; CI-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; CI-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("one-as") monotonic monotonic, align 4
+; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; CI-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; CI-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CI: atomicrmw.end:
-; CI-NEXT: ret float [[TMP6]]
+; CI-NEXT: ret float [[TMP5]]
;
; GFX9-LABEL: @test_atomicrmw_fadd_f32_global_one_as(
; GFX9-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; GFX9-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX9: atomicrmw.start:
-; GFX9-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX9-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX9-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX9-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX9-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("one-as") monotonic monotonic, align 4
-; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX9-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX9-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX9-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX9-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("one-as") monotonic monotonic, align 4
+; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX9-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX9: atomicrmw.end:
-; GFX9-NEXT: ret float [[TMP6]]
+; GFX9-NEXT: ret float [[TMP5]]
;
; GFX908-LABEL: @test_atomicrmw_fadd_f32_global_one_as(
; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX908: atomicrmw.start:
-; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("one-as") monotonic monotonic, align 4
-; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("one-as") monotonic monotonic, align 4
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX908: atomicrmw.end:
-; GFX908-NEXT: ret float [[TMP6]]
+; GFX908-NEXT: ret float [[TMP5]]
;
; GFX90A-LABEL: @test_atomicrmw_fadd_f32_global_one_as(
; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX90A: atomicrmw.start:
-; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("one-as") monotonic monotonic, align 4
-; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("one-as") monotonic monotonic, align 4
+; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX90A: atomicrmw.end:
-; GFX90A-NEXT: ret float [[TMP6]]
+; GFX90A-NEXT: ret float [[TMP5]]
;
; GFX940-LABEL: @test_atomicrmw_fadd_f32_global_one_as(
; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX940: atomicrmw.start:
-; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX940-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("one-as") monotonic monotonic, align 4
-; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX940-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX940-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("one-as") monotonic monotonic, align 4
+; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX940-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX940: atomicrmw.end:
-; GFX940-NEXT: ret float [[TMP6]]
+; GFX940-NEXT: ret float [[TMP5]]
;
; GFX11-LABEL: @test_atomicrmw_fadd_f32_global_one_as(
; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX11: atomicrmw.start:
-; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
-; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
-; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("one-as") monotonic monotonic, align 4
-; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
-; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
-; GFX11-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
+; GFX11-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
+; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("one-as") monotonic monotonic, align 4
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; GFX11-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX11: atomicrmw.end:
-; GFX11-NEXT: ret float [[TMP6]]
+; GFX11-NEXT: ret float [[TMP5]]
;
%res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("one-as") monotonic
ret float %res
More information about the llvm-commits
mailing list