[llvm] AMDGPU: Fix atomic expand tests accidentally underaligning (PR #147299)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 7 06:40:26 PDT 2025
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/147299
None
>From d067e1e014bb70c8abc0db114bcd215cd1c0de01 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 7 Jul 2025 21:59:46 +0900
Subject: [PATCH] AMDGPU: Fix atomic expand tests accidentally underaligning
---
.../AMDGPU/expand-atomic-f64-agent.ll | 2692 ++++++++++++-----
.../AMDGPU/expand-atomic-f64-system.ll | 2301 +++++++++-----
2 files changed, 3528 insertions(+), 1465 deletions(-)
diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-agent.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-agent.ll
index 1c2ae608711cc..668e7ba9b1f0f 100644
--- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-agent.ll
+++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-agent.ll
@@ -787,419 +787,1863 @@ define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memo
}
define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
-; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[NEWLOADED]]
+; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode(
+; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX803: atomicrmw.start:
+; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
+; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX803: atomicrmw.end:
+; GFX803-NEXT: ret double [[TMP5]]
+;
+; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode(
+; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX906: atomicrmw.start:
+; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
+; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX906: atomicrmw.end:
+; GFX906-NEXT: ret double [[TMP5]]
+;
+; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode(
+; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX908: atomicrmw.start:
+; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX908: atomicrmw.end:
+; GFX908-NEXT: ret double [[TMP5]]
+;
+; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode(
+; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX90A: atomicrmw.start:
+; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX90A-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX90A-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
+; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX90A-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX90A: atomicrmw.end:
+; GFX90A-NEXT: ret double [[TMP5]]
+;
+; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode(
+; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.ignore.denormal.mode [[META0]]
+; GFX942-NEXT: ret double [[RES]]
+;
+; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode(
+; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX10: atomicrmw.start:
+; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
+; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX10: atomicrmw.end:
+; GFX10-NEXT: ret double [[TMP5]]
+;
+; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode(
+; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX11: atomicrmw.start:
+; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX11: atomicrmw.end:
+; GFX11-NEXT: ret double [[TMP5]]
+;
+; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode(
+; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX12: atomicrmw.start:
+; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
+; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX12: atomicrmw.end:
+; GFX12-NEXT: ret double [[TMP5]]
;
- %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode !0
+ %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.ignore.denormal.mode !0
ret double %res
}
define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
-; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[NEWLOADED]]
+; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX803: atomicrmw.start:
+; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX803: atomicrmw.end:
+; GFX803-NEXT: ret double [[TMP5]]
;
- %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
- ret double %res
-}
-
-define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
-; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[NEWLOADED]]
+; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX906: atomicrmw.start:
+; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX906: atomicrmw.end:
+; GFX906-NEXT: ret double [[TMP5]]
;
- %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
- ret double %res
-}
-
-define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
-; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[NEWLOADED]]
+; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX908: atomicrmw.start:
+; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX908: atomicrmw.end:
+; GFX908-NEXT: ret double [[TMP5]]
;
- %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
- ret double %res
-}
-
-define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(ptr addrspace(1) %ptr, double %value) #0 {
-; COMMON-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] {
-; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
-; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[NEWLOADED]]
+; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX90A-NEXT: ret double [[RES]]
;
- %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
- ret double %res
-}
-
-define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(ptr addrspace(1) %ptr, double %value) #1 {
-; COMMON-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] {
-; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
-; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[NEWLOADED]]
+; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX942-NEXT: ret double [[RES]]
+;
+; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX10: atomicrmw.start:
+; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX10: atomicrmw.end:
+; GFX10-NEXT: ret double [[TMP5]]
+;
+; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX11: atomicrmw.start:
+; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX11: atomicrmw.end:
+; GFX11-NEXT: ret double [[TMP5]]
+;
+; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX12: atomicrmw.start:
+; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX12: atomicrmw.end:
+; GFX12-NEXT: ret double [[TMP5]]
;
- %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
+ %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
ret double %res
}
-;---------------------------------------------------------------------
+define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX803: atomicrmw.start:
+; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX803: atomicrmw.end:
+; GFX803-NEXT: ret double [[TMP5]]
+;
+; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX906: atomicrmw.start:
+; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX906: atomicrmw.end:
+; GFX906-NEXT: ret double [[TMP5]]
+;
+; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX908: atomicrmw.start:
+; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX908: atomicrmw.end:
+; GFX908-NEXT: ret double [[TMP5]]
+;
+; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX90A: atomicrmw.start:
+; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX90A-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX90A-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX90A-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX90A: atomicrmw.end:
+; GFX90A-NEXT: ret double [[TMP5]]
+;
+; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX942-NEXT: ret double [[RES]]
+;
+; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX10: atomicrmw.start:
+; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX10: atomicrmw.end:
+; GFX10-NEXT: ret double [[TMP5]]
+;
+; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX11: atomicrmw.start:
+; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX11: atomicrmw.end:
+; GFX11-NEXT: ret double [[TMP5]]
+;
+; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX12: atomicrmw.start:
+; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX12: atomicrmw.end:
+; GFX12-NEXT: ret double [[TMP5]]
+;
+ %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
+ ret double %res
+}
+
+define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX803: atomicrmw.start:
+; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX803: atomicrmw.end:
+; GFX803-NEXT: ret double [[TMP5]]
+;
+; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX906: atomicrmw.start:
+; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX906: atomicrmw.end:
+; GFX906-NEXT: ret double [[TMP5]]
+;
+; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX908: atomicrmw.start:
+; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX908: atomicrmw.end:
+; GFX908-NEXT: ret double [[TMP5]]
+;
+; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX90A-NEXT: ret double [[RES]]
+;
+; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX942-NEXT: ret double [[RES]]
+;
+; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX10: atomicrmw.start:
+; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX10: atomicrmw.end:
+; GFX10-NEXT: ret double [[TMP5]]
+;
+; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX11: atomicrmw.start:
+; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX11: atomicrmw.end:
+; GFX11-NEXT: ret double [[TMP5]]
+;
+; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX12: atomicrmw.start:
+; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX12: atomicrmw.end:
+; GFX12-NEXT: ret double [[TMP5]]
+;
+ %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
+ ret double %res
+}
+
+define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(ptr addrspace(1) %ptr, double %value) #0 {
+; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
+; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1]] {
+; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX803: atomicrmw.start:
+; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX803: atomicrmw.end:
+; GFX803-NEXT: ret double [[TMP5]]
+;
+; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
+; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1]] {
+; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX906: atomicrmw.start:
+; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX906: atomicrmw.end:
+; GFX906-NEXT: ret double [[TMP5]]
+;
+; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
+; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1]] {
+; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX908: atomicrmw.start:
+; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX908: atomicrmw.end:
+; GFX908-NEXT: ret double [[TMP5]]
+;
+; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
+; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1]] {
+; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX90A-NEXT: ret double [[RES]]
+;
+; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
+; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1]] {
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX942-NEXT: ret double [[RES]]
+;
+; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
+; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1]] {
+; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX10: atomicrmw.start:
+; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX10: atomicrmw.end:
+; GFX10-NEXT: ret double [[TMP5]]
+;
+; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
+; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1]] {
+; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX11: atomicrmw.start:
+; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX11: atomicrmw.end:
+; GFX11-NEXT: ret double [[TMP5]]
+;
+; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
+; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1]] {
+; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX12: atomicrmw.start:
+; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX12: atomicrmw.end:
+; GFX12-NEXT: ret double [[TMP5]]
+;
+ %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
+ ret double %res
+}
+
+define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(ptr addrspace(1) %ptr, double %value) #1 {
+; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
+; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2]] {
+; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX803: atomicrmw.start:
+; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX803: atomicrmw.end:
+; GFX803-NEXT: ret double [[TMP5]]
+;
+; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
+; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2]] {
+; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX906: atomicrmw.start:
+; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX906: atomicrmw.end:
+; GFX906-NEXT: ret double [[TMP5]]
+;
+; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
+; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2]] {
+; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX908: atomicrmw.start:
+; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX908: atomicrmw.end:
+; GFX908-NEXT: ret double [[TMP5]]
+;
+; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
+; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2]] {
+; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX90A-NEXT: ret double [[RES]]
+;
+; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
+; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2]] {
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX942-NEXT: ret double [[RES]]
+;
+; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
+; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2]] {
+; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX10: atomicrmw.start:
+; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX10: atomicrmw.end:
+; GFX10-NEXT: ret double [[TMP5]]
+;
+; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
+; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2]] {
+; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX11: atomicrmw.start:
+; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX11: atomicrmw.end:
+; GFX11-NEXT: ret double [[TMP5]]
+;
+; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
+; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2]] {
+; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX12: atomicrmw.start:
+; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX12: atomicrmw.end:
+; GFX12-NEXT: ret double [[TMP5]]
+;
+ %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
+ ret double %res
+}
+
+;---------------------------------------------------------------------
; atomicrmw fsub
;---------------------------------------------------------------------
-define double @test_atomicrmw_fsub_f64_global_agent(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_agent(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
-; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
-; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
-; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
-; COMMON-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[RES]]
+define double @test_atomicrmw_fsub_f64_global_agent(ptr addrspace(1) %ptr, double %value) {
+; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_agent(
+; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
+; COMMON: atomicrmw.start:
+; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
+; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
+; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; COMMON-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
+; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; COMMON: atomicrmw.end:
+; COMMON-NEXT: ret double [[RES]]
+;
+ %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst
+ ret double %res
+}
+
+define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
+; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_no_fine_grained_memory(
+; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
+; COMMON: atomicrmw.start:
+; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
+; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; COMMON-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
+; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; COMMON: atomicrmw.end:
+; COMMON-NEXT: ret double [[RES]]
+;
+ %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
+ ret double %res
+}
+
+define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
+; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_no_remote_memory(
+; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
+; COMMON: atomicrmw.start:
+; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
+; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; COMMON-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
+; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; COMMON: atomicrmw.end:
+; COMMON-NEXT: ret double [[RES]]
+;
+ %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0
+ ret double %res
+}
+
+define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
+; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
+; COMMON: atomicrmw.start:
+; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
+; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; COMMON-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
+; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; COMMON: atomicrmw.end:
+; COMMON-NEXT: ret double [[RES]]
+;
+ %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
+ ret double %res
+}
+
+define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, double %value) {
+; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode(
+; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
+; COMMON: atomicrmw.start:
+; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
+; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; COMMON-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; COMMON: atomicrmw.end:
+; COMMON-NEXT: ret double [[TMP5]]
+;
+ %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.ignore.denormal.mode !0
+ ret double %res
+}
+
+define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
+; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
+; COMMON: atomicrmw.start:
+; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; COMMON-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; COMMON: atomicrmw.end:
+; COMMON-NEXT: ret double [[TMP5]]
+;
+ %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
+ ret double %res
+}
+
+define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
+; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
+; COMMON: atomicrmw.start:
+; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; COMMON-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; COMMON: atomicrmw.end:
+; COMMON-NEXT: ret double [[TMP5]]
+;
+ %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
+ ret double %res
+}
+
+define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
+; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
+; COMMON: atomicrmw.start:
+; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; COMMON-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; COMMON: atomicrmw.end:
+; COMMON-NEXT: ret double [[TMP5]]
+;
+ %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
+ ret double %res
+}
+
+;---------------------------------------------------------------------
+; atomicrmw fmax
+;---------------------------------------------------------------------
+
+define double @test_atomicrmw_fmax_f64_global_agent(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_agent(
+; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX803: atomicrmw.start:
+; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
+; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX803: atomicrmw.end:
+; GFX803-NEXT: ret double [[TMP6]]
+;
+; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_agent(
+; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX906: atomicrmw.start:
+; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
+; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX906: atomicrmw.end:
+; GFX906-NEXT: ret double [[TMP6]]
+;
+; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_agent(
+; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX908: atomicrmw.start:
+; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX908: atomicrmw.end:
+; GFX908-NEXT: ret double [[TMP6]]
+;
+; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_agent(
+; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX90A: atomicrmw.start:
+; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX90A-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX90A-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
+; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX90A-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX90A: atomicrmw.end:
+; GFX90A-NEXT: ret double [[TMP6]]
+;
+; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_agent(
+; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8
+; GFX942-NEXT: ret double [[RES]]
+;
+; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_agent(
+; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX10: atomicrmw.start:
+; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX10-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX10-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
+; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX10-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX10: atomicrmw.end:
+; GFX10-NEXT: ret double [[TMP6]]
+;
+; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_agent(
+; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX11: atomicrmw.start:
+; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX11: atomicrmw.end:
+; GFX11-NEXT: ret double [[TMP6]]
+;
+; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_agent(
+; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX12: atomicrmw.start:
+; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
+; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX12: atomicrmw.end:
+; GFX12-NEXT: ret double [[TMP6]]
+;
+ %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst
+ ret double %res
+}
+
+define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory(
+; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX803: atomicrmw.start:
+; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX803: atomicrmw.end:
+; GFX803-NEXT: ret double [[TMP6]]
+;
+; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory(
+; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX906: atomicrmw.start:
+; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX906: atomicrmw.end:
+; GFX906-NEXT: ret double [[TMP6]]
+;
+; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory(
+; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX908: atomicrmw.start:
+; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX908: atomicrmw.end:
+; GFX908-NEXT: ret double [[TMP6]]
+;
+; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory(
+; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX90A-NEXT: ret double [[RES]]
+;
+; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory(
+; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX942-NEXT: ret double [[RES]]
+;
+; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory(
+; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX10-NEXT: ret double [[RES]]
+;
+; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory(
+; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX11: atomicrmw.start:
+; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX11: atomicrmw.end:
+; GFX11-NEXT: ret double [[TMP6]]
+;
+; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory(
+; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX12: atomicrmw.start:
+; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX12: atomicrmw.end:
+; GFX12-NEXT: ret double [[TMP6]]
+;
+ %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
+ ret double %res
+}
+
+define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(
+; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX803: atomicrmw.start:
+; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX803: atomicrmw.end:
+; GFX803-NEXT: ret double [[TMP6]]
+;
+; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(
+; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX906: atomicrmw.start:
+; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX906: atomicrmw.end:
+; GFX906-NEXT: ret double [[TMP6]]
+;
+; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(
+; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX908: atomicrmw.start:
+; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX908: atomicrmw.end:
+; GFX908-NEXT: ret double [[TMP6]]
+;
+; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(
+; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX90A: atomicrmw.start:
+; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
+; GFX90A-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX90A-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX90A-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
+; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX90A: atomicrmw.end:
+; GFX90A-NEXT: ret double [[RES]]
+;
+; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(
+; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX942-NEXT: ret double [[RES]]
+;
+; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(
+; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX10: atomicrmw.start:
+; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
+; GFX10-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX10-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX10-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
+; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX10: atomicrmw.end:
+; GFX10-NEXT: ret double [[RES]]
+;
+; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(
+; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX11: atomicrmw.start:
+; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX11: atomicrmw.end:
+; GFX11-NEXT: ret double [[TMP6]]
+;
+; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(
+; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX12: atomicrmw.start:
+; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX12: atomicrmw.end:
+; GFX12-NEXT: ret double [[TMP6]]
+;
+ %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0
+ ret double %res
+}
+
+define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX803: atomicrmw.start:
+; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX803: atomicrmw.end:
+; GFX803-NEXT: ret double [[TMP6]]
+;
+; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX906: atomicrmw.start:
+; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX906: atomicrmw.end:
+; GFX906-NEXT: ret double [[TMP6]]
+;
+; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX908: atomicrmw.start:
+; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX908: atomicrmw.end:
+; GFX908-NEXT: ret double [[TMP6]]
+;
+; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX90A-NEXT: ret double [[RES]]
+;
+; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX942-NEXT: ret double [[RES]]
+;
+; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX10-NEXT: ret double [[RES]]
+;
+; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX11: atomicrmw.start:
+; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX11: atomicrmw.end:
+; GFX11-NEXT: ret double [[TMP6]]
+;
+; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX12: atomicrmw.start:
+; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX12: atomicrmw.end:
+; GFX12-NEXT: ret double [[TMP6]]
+;
+ %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
+ ret double %res
+}
+
+define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode(
+; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX803: atomicrmw.start:
+; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
+; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX803: atomicrmw.end:
+; GFX803-NEXT: ret double [[TMP6]]
+;
+; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode(
+; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX906: atomicrmw.start:
+; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
+; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX906: atomicrmw.end:
+; GFX906-NEXT: ret double [[TMP6]]
+;
+; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode(
+; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX908: atomicrmw.start:
+; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX908: atomicrmw.end:
+; GFX908-NEXT: ret double [[TMP6]]
+;
+; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode(
+; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX90A: atomicrmw.start:
+; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX90A-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX90A-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
+; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX90A-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX90A: atomicrmw.end:
+; GFX90A-NEXT: ret double [[TMP6]]
+;
+; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode(
+; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.ignore.denormal.mode [[META0]]
+; GFX942-NEXT: ret double [[RES]]
+;
+; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode(
+; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX10: atomicrmw.start:
+; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX10-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX10-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
+; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX10-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX10: atomicrmw.end:
+; GFX10-NEXT: ret double [[TMP6]]
+;
+; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode(
+; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX11: atomicrmw.start:
+; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX11: atomicrmw.end:
+; GFX11-NEXT: ret double [[TMP6]]
+;
+; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode(
+; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX12: atomicrmw.start:
+; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
+; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX12: atomicrmw.end:
+; GFX12-NEXT: ret double [[TMP6]]
+;
+ %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.ignore.denormal.mode !0
+ ret double %res
+}
+
+define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX803: atomicrmw.start:
+; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX803: atomicrmw.end:
+; GFX803-NEXT: ret double [[TMP6]]
+;
+; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX906: atomicrmw.start:
+; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX906: atomicrmw.end:
+; GFX906-NEXT: ret double [[TMP6]]
+;
+; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX908: atomicrmw.start:
+; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX908: atomicrmw.end:
+; GFX908-NEXT: ret double [[TMP6]]
+;
+; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX90A-NEXT: ret double [[RES]]
+;
+; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX942-NEXT: ret double [[RES]]
+;
+; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX10-NEXT: ret double [[RES]]
+;
+; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX11: atomicrmw.start:
+; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX11: atomicrmw.end:
+; GFX11-NEXT: ret double [[TMP6]]
+;
+; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX12: atomicrmw.start:
+; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX12: atomicrmw.end:
+; GFX12-NEXT: ret double [[TMP6]]
+;
+ %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
+ ret double %res
+}
+
+define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX803: atomicrmw.start:
+; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX803: atomicrmw.end:
+; GFX803-NEXT: ret double [[TMP6]]
+;
+; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX906: atomicrmw.start:
+; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX906: atomicrmw.end:
+; GFX906-NEXT: ret double [[TMP6]]
+;
+; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX908: atomicrmw.start:
+; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX908: atomicrmw.end:
+; GFX908-NEXT: ret double [[TMP6]]
+;
+; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX90A: atomicrmw.start:
+; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX90A-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX90A-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX90A-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX90A: atomicrmw.end:
+; GFX90A-NEXT: ret double [[TMP6]]
+;
+; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX942-NEXT: ret double [[RES]]
+;
+; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX10: atomicrmw.start:
+; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX10-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX10-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX10-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX10: atomicrmw.end:
+; GFX10-NEXT: ret double [[TMP6]]
+;
+; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX11: atomicrmw.start:
+; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX11: atomicrmw.end:
+; GFX11-NEXT: ret double [[TMP6]]
;
- %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst
- ret double %res
-}
-
-define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_no_fine_grained_memory(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
-; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
-; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
-; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
-; COMMON-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[RES]]
+; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX12: atomicrmw.start:
+; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX12: atomicrmw.end:
+; GFX12-NEXT: ret double [[TMP6]]
;
- %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
+ %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
ret double %res
}
-define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_no_remote_memory(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
-; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
-; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
-; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
-; COMMON-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[RES]]
+define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX803: atomicrmw.start:
+; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX803: atomicrmw.end:
+; GFX803-NEXT: ret double [[TMP6]]
;
- %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0
- ret double %res
-}
-
-define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
-; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
-; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
-; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
-; COMMON-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[RES]]
+; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX906: atomicrmw.start:
+; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX906: atomicrmw.end:
+; GFX906-NEXT: ret double [[TMP6]]
;
- %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
- ret double %res
-}
-
-define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP9:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP9]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
-; COMMON-NEXT: [[TMP5]] = extractvalue { double, i1 } [[TMP8]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[TMP5]]
+; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX908: atomicrmw.start:
+; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX908: atomicrmw.end:
+; GFX908-NEXT: ret double [[TMP6]]
;
- %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode !0
- ret double %res
-}
-
-define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP9:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP9]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
-; COMMON-NEXT: [[TMP5]] = extractvalue { double, i1 } [[TMP8]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[TMP5]]
+; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX90A-NEXT: ret double [[RES]]
;
- %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
- ret double %res
-}
-
-define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP9:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP9]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
-; COMMON-NEXT: [[TMP5]] = extractvalue { double, i1 } [[TMP8]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[TMP5]]
+; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX942-NEXT: ret double [[RES]]
;
- %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
- ret double %res
-}
-
-define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP9:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP9]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
-; COMMON-NEXT: [[TMP5]] = extractvalue { double, i1 } [[TMP8]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[TMP5]]
+; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX10-NEXT: ret double [[RES]]
+;
+; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX11: atomicrmw.start:
+; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX11: atomicrmw.end:
+; GFX11-NEXT: ret double [[TMP6]]
+;
+; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX12: atomicrmw.start:
+; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX12: atomicrmw.end:
+; GFX12-NEXT: ret double [[TMP6]]
;
- %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
+ %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
ret double %res
}
;---------------------------------------------------------------------
-; atomicrmw fmax
+; atomicrmw fmin
;---------------------------------------------------------------------
-define double @test_atomicrmw_fmax_f64_global_agent(ptr addrspace(1) %ptr, double %value) {
-; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_agent(
+define double @test_atomicrmw_fmin_f64_global_agent(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_agent(
; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX803: atomicrmw.start:
; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
@@ -1210,13 +2654,13 @@ define double @test_atomicrmw_fmax_f64_global_agent(ptr addrspace(1) %ptr, doubl
; GFX803: atomicrmw.end:
; GFX803-NEXT: ret double [[TMP6]]
;
-; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_agent(
+; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_agent(
; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX906: atomicrmw.start:
; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
@@ -1227,13 +2671,13 @@ define double @test_atomicrmw_fmax_f64_global_agent(ptr addrspace(1) %ptr, doubl
; GFX906: atomicrmw.end:
; GFX906-NEXT: ret double [[TMP6]]
;
-; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_agent(
+; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_agent(
; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX908: atomicrmw.start:
; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
@@ -1244,13 +2688,13 @@ define double @test_atomicrmw_fmax_f64_global_agent(ptr addrspace(1) %ptr, doubl
; GFX908: atomicrmw.end:
; GFX908-NEXT: ret double [[TMP6]]
;
-; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_agent(
+; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_agent(
; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX90A: atomicrmw.start:
; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX90A-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX90A-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX90A-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
@@ -1261,18 +2705,18 @@ define double @test_atomicrmw_fmax_f64_global_agent(ptr addrspace(1) %ptr, doubl
; GFX90A: atomicrmw.end:
; GFX90A-NEXT: ret double [[TMP6]]
;
-; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_agent(
+; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_agent(
; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8
; GFX942-NEXT: ret double [[RES]]
;
-; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_agent(
+; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_agent(
; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX10: atomicrmw.start:
; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX10-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX10-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX10-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
@@ -1283,13 +2727,13 @@ define double @test_atomicrmw_fmax_f64_global_agent(ptr addrspace(1) %ptr, doubl
; GFX10: atomicrmw.end:
; GFX10-NEXT: ret double [[TMP6]]
;
-; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_agent(
+; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_agent(
; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX11: atomicrmw.start:
; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
@@ -1300,13 +2744,13 @@ define double @test_atomicrmw_fmax_f64_global_agent(ptr addrspace(1) %ptr, doubl
; GFX11: atomicrmw.end:
; GFX11-NEXT: ret double [[TMP6]]
;
-; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_agent(
+; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_agent(
; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX12: atomicrmw.start:
; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
@@ -1317,18 +2761,18 @@ define double @test_atomicrmw_fmax_f64_global_agent(ptr addrspace(1) %ptr, doubl
; GFX12: atomicrmw.end:
; GFX12-NEXT: ret double [[TMP6]]
;
- %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst
+ %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst
ret double %res
}
-define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
-; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory(
+define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory(
; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX803: atomicrmw.start:
; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
@@ -1339,13 +2783,13 @@ define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memo
; GFX803: atomicrmw.end:
; GFX803-NEXT: ret double [[TMP6]]
;
-; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory(
+; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory(
; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX906: atomicrmw.start:
; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
@@ -1356,13 +2800,13 @@ define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memo
; GFX906: atomicrmw.end:
; GFX906-NEXT: ret double [[TMP6]]
;
-; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory(
+; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory(
; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX908: atomicrmw.start:
; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
@@ -1373,28 +2817,28 @@ define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memo
; GFX908: atomicrmw.end:
; GFX908-NEXT: ret double [[TMP6]]
;
-; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory(
+; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory(
; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
; GFX90A-NEXT: ret double [[RES]]
;
-; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory(
+; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory(
; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
; GFX942-NEXT: ret double [[RES]]
;
-; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory(
+; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory(
; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
; GFX10-NEXT: ret double [[RES]]
;
-; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory(
+; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory(
; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX11: atomicrmw.start:
; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
@@ -1405,13 +2849,13 @@ define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memo
; GFX11: atomicrmw.end:
; GFX11-NEXT: ret double [[TMP6]]
;
-; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory(
+; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory(
; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX12: atomicrmw.start:
; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
@@ -1422,18 +2866,18 @@ define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memo
; GFX12: atomicrmw.end:
; GFX12-NEXT: ret double [[TMP6]]
;
- %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
+ %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
ret double %res
}
-define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
-; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(
+define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(
; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX803: atomicrmw.start:
; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
@@ -1444,13 +2888,13 @@ define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(ptr
; GFX803: atomicrmw.end:
; GFX803-NEXT: ret double [[TMP6]]
;
-; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(
+; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(
; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX906: atomicrmw.start:
; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
@@ -1461,13 +2905,13 @@ define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(ptr
; GFX906: atomicrmw.end:
; GFX906-NEXT: ret double [[TMP6]]
;
-; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(
+; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(
; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX908: atomicrmw.start:
; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
@@ -1478,13 +2922,13 @@ define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(ptr
; GFX908: atomicrmw.end:
; GFX908-NEXT: ret double [[TMP6]]
;
-; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(
+; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(
; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX90A: atomicrmw.start:
; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
-; GFX90A-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX90A-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX90A-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
@@ -1495,18 +2939,18 @@ define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(ptr
; GFX90A: atomicrmw.end:
; GFX90A-NEXT: ret double [[RES]]
;
-; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(
+; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(
; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
; GFX942-NEXT: ret double [[RES]]
;
-; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(
+; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(
; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX10: atomicrmw.start:
; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
-; GFX10-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX10-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX10-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
@@ -1517,13 +2961,13 @@ define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(ptr
; GFX10: atomicrmw.end:
; GFX10-NEXT: ret double [[RES]]
;
-; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(
+; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(
; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX11: atomicrmw.start:
; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
@@ -1534,13 +2978,13 @@ define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(ptr
; GFX11: atomicrmw.end:
; GFX11-NEXT: ret double [[TMP6]]
;
-; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(
+; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(
; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX12: atomicrmw.start:
; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
@@ -1551,18 +2995,18 @@ define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(ptr
; GFX12: atomicrmw.end:
; GFX12-NEXT: ret double [[TMP6]]
;
- %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0
+ %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0
ret double %res
}
-define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
-; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX803: atomicrmw.start:
; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
@@ -1573,13 +3017,13 @@ define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memo
; GFX803: atomicrmw.end:
; GFX803-NEXT: ret double [[TMP6]]
;
-; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX906: atomicrmw.start:
; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
@@ -1590,13 +3034,13 @@ define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memo
; GFX906: atomicrmw.end:
; GFX906-NEXT: ret double [[TMP6]]
;
-; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX908: atomicrmw.start:
; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
@@ -1607,28 +3051,28 @@ define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memo
; GFX908: atomicrmw.end:
; GFX908-NEXT: ret double [[TMP6]]
;
-; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
; GFX90A-NEXT: ret double [[RES]]
;
-; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
; GFX942-NEXT: ret double [[RES]]
;
-; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
; GFX10-NEXT: ret double [[RES]]
;
-; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX11: atomicrmw.start:
; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
@@ -1639,13 +3083,13 @@ define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memo
; GFX11: atomicrmw.end:
; GFX11-NEXT: ret double [[TMP6]]
;
-; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX12: atomicrmw.start:
; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
@@ -1656,140 +3100,12 @@ define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memo
; GFX12: atomicrmw.end:
; GFX12-NEXT: ret double [[TMP6]]
;
- %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
- ret double %res
-}
-
-define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
-; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
-; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[TMP6]]
-;
- %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode !0
- ret double %res
-}
-
-define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
-; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
-; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[TMP6]]
-;
- %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
- ret double %res
-}
-
-define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
-; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
-; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[TMP6]]
-;
- %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
- ret double %res
-}
-
-define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
-; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
-; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[TMP6]]
-;
- %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
+ %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
ret double %res
}
-;---------------------------------------------------------------------
-; atomicrmw fmin
-;---------------------------------------------------------------------
-
-define double @test_atomicrmw_fmin_f64_global_agent(ptr addrspace(1) %ptr, double %value) {
-; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_agent(
+define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode(
; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -1806,7 +3122,7 @@ define double @test_atomicrmw_fmin_f64_global_agent(ptr addrspace(1) %ptr, doubl
; GFX803: atomicrmw.end:
; GFX803-NEXT: ret double [[TMP6]]
;
-; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_agent(
+; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode(
; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -1823,7 +3139,7 @@ define double @test_atomicrmw_fmin_f64_global_agent(ptr addrspace(1) %ptr, doubl
; GFX906: atomicrmw.end:
; GFX906-NEXT: ret double [[TMP6]]
;
-; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_agent(
+; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode(
; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -1840,7 +3156,7 @@ define double @test_atomicrmw_fmin_f64_global_agent(ptr addrspace(1) %ptr, doubl
; GFX908: atomicrmw.end:
; GFX908-NEXT: ret double [[TMP6]]
;
-; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_agent(
+; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode(
; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -1857,12 +3173,12 @@ define double @test_atomicrmw_fmin_f64_global_agent(ptr addrspace(1) %ptr, doubl
; GFX90A: atomicrmw.end:
; GFX90A-NEXT: ret double [[TMP6]]
;
-; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_agent(
+; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode(
; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.ignore.denormal.mode [[META0]]
; GFX942-NEXT: ret double [[RES]]
;
-; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_agent(
+; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode(
; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -1879,7 +3195,7 @@ define double @test_atomicrmw_fmin_f64_global_agent(ptr addrspace(1) %ptr, doubl
; GFX10: atomicrmw.end:
; GFX10-NEXT: ret double [[TMP6]]
;
-; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_agent(
+; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode(
; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -1896,7 +3212,7 @@ define double @test_atomicrmw_fmin_f64_global_agent(ptr addrspace(1) %ptr, doubl
; GFX11: atomicrmw.end:
; GFX11-NEXT: ret double [[TMP6]]
;
-; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_agent(
+; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode(
; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -1913,12 +3229,12 @@ define double @test_atomicrmw_fmin_f64_global_agent(ptr addrspace(1) %ptr, doubl
; GFX12: atomicrmw.end:
; GFX12-NEXT: ret double [[TMP6]]
;
- %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst
+ %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.ignore.denormal.mode !0
ret double %res
}
-define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
-; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory(
+define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -1935,7 +3251,7 @@ define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memo
; GFX803: atomicrmw.end:
; GFX803-NEXT: ret double [[TMP6]]
;
-; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory(
+; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -1952,7 +3268,7 @@ define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memo
; GFX906: atomicrmw.end:
; GFX906-NEXT: ret double [[TMP6]]
;
-; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory(
+; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -1969,22 +3285,22 @@ define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memo
; GFX908: atomicrmw.end:
; GFX908-NEXT: ret double [[TMP6]]
;
-; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory(
+; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
; GFX90A-NEXT: ret double [[RES]]
;
-; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory(
+; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
; GFX942-NEXT: ret double [[RES]]
;
-; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory(
+; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
; GFX10-NEXT: ret double [[RES]]
;
-; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory(
+; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -2001,7 +3317,7 @@ define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memo
; GFX11: atomicrmw.end:
; GFX11-NEXT: ret double [[TMP6]]
;
-; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory(
+; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -2018,12 +3334,12 @@ define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memo
; GFX12: atomicrmw.end:
; GFX12-NEXT: ret double [[TMP6]]
;
- %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
+ %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
ret double %res
}
-define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
-; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(
+define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -2040,7 +3356,7 @@ define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(ptr
; GFX803: atomicrmw.end:
; GFX803-NEXT: ret double [[TMP6]]
;
-; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(
+; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -2057,7 +3373,7 @@ define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(ptr
; GFX906: atomicrmw.end:
; GFX906-NEXT: ret double [[TMP6]]
;
-; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(
+; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -2074,46 +3390,46 @@ define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(ptr
; GFX908: atomicrmw.end:
; GFX908-NEXT: ret double [[TMP6]]
;
-; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(
+; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX90A: atomicrmw.start:
-; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
+; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; GFX90A-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX90A-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
-; GFX90A-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
+; GFX90A-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX90A: atomicrmw.end:
-; GFX90A-NEXT: ret double [[RES]]
+; GFX90A-NEXT: ret double [[TMP6]]
;
-; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(
+; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
; GFX942-NEXT: ret double [[RES]]
;
-; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(
+; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX10: atomicrmw.start:
-; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
+; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; GFX10-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX10-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
-; GFX10-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
+; GFX10-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX10: atomicrmw.end:
-; GFX10-NEXT: ret double [[RES]]
+; GFX10-NEXT: ret double [[TMP6]]
;
-; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(
+; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -2130,7 +3446,7 @@ define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(ptr
; GFX11: atomicrmw.end:
; GFX11-NEXT: ret double [[TMP6]]
;
-; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(
+; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -2147,12 +3463,12 @@ define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(ptr
; GFX12: atomicrmw.end:
; GFX12-NEXT: ret double [[TMP6]]
;
- %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0
+ %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
ret double %res
}
-define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
-; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -2169,7 +3485,7 @@ define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memo
; GFX803: atomicrmw.end:
; GFX803-NEXT: ret double [[TMP6]]
;
-; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -2186,7 +3502,7 @@ define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memo
; GFX906: atomicrmw.end:
; GFX906-NEXT: ret double [[TMP6]]
;
-; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -2203,22 +3519,22 @@ define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memo
; GFX908: atomicrmw.end:
; GFX908-NEXT: ret double [[TMP6]]
;
-; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
; GFX90A-NEXT: ret double [[RES]]
;
-; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
; GFX942-NEXT: ret double [[RES]]
;
-; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
; GFX10-NEXT: ret double [[RES]]
;
-; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -2235,7 +3551,7 @@ define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memo
; GFX11: atomicrmw.end:
; GFX11-NEXT: ret double [[TMP6]]
;
-; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -2252,131 +3568,7 @@ define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memo
; GFX12: atomicrmw.end:
; GFX12-NEXT: ret double [[TMP6]]
;
- %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
- ret double %res
-}
-
-define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
-; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
-; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[TMP6]]
-;
- %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode !0
- ret double %res
-}
-
-define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
-; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
-; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[TMP6]]
-;
- %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
- ret double %res
-}
-
-define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
-; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
-; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[TMP6]]
-;
- %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
- ret double %res
-}
-
-define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
-; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
-; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[TMP6]]
-;
- %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
+ %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
ret double %res
}
diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-system.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-system.ll
index 72ecbf7708d86..f9c123f5b9427 100644
--- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-system.ll
+++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-system.ll
@@ -682,417 +682,1540 @@ define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_mem
define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, double %value) {
; COMMON-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
+; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
-; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
+; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; COMMON-NEXT: [[TMP5:%.*]] = bitcast double [[LOADED]] to i64
+; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP5]], i64 [[TMP2]] seq_cst seq_cst, align 8
+; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; COMMON-NEXT: [[NEWLOADED1:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; COMMON-NEXT: [[NEWLOADED]] = bitcast i64 [[NEWLOADED1]] to double
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret double [[NEWLOADED]]
;
- %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.ignore.denormal.mode !0
- ret double %res
-}
-
-define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
-; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[NEWLOADED]]
+ %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value seq_cst, align 8, !amdgpu.ignore.denormal.mode !0
+ ret double %res
+}
+
+define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX803: atomicrmw.start:
+; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX803: atomicrmw.end:
+; GFX803-NEXT: ret double [[TMP5]]
+;
+; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX906: atomicrmw.start:
+; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX906: atomicrmw.end:
+; GFX906-NEXT: ret double [[TMP5]]
+;
+; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX908: atomicrmw.start:
+; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX908: atomicrmw.end:
+; GFX908-NEXT: ret double [[TMP5]]
+;
+; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX90A-NEXT: ret double [[RES]]
+;
+; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX942-NEXT: ret double [[RES]]
+;
+; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX10: atomicrmw.start:
+; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX10: atomicrmw.end:
+; GFX10-NEXT: ret double [[TMP5]]
+;
+; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX11: atomicrmw.start:
+; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX11: atomicrmw.end:
+; GFX11-NEXT: ret double [[TMP5]]
+;
+; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX12: atomicrmw.start:
+; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX12: atomicrmw.end:
+; GFX12-NEXT: ret double [[TMP5]]
+;
+ %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value seq_cst, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
+ ret double %res
+}
+
+define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX803: atomicrmw.start:
+; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX803: atomicrmw.end:
+; GFX803-NEXT: ret double [[TMP5]]
+;
+; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX906: atomicrmw.start:
+; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX906: atomicrmw.end:
+; GFX906-NEXT: ret double [[TMP5]]
+;
+; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX908: atomicrmw.start:
+; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX908: atomicrmw.end:
+; GFX908-NEXT: ret double [[TMP5]]
+;
+; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX90A: atomicrmw.start:
+; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX90A-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX90A-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX90A-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX90A: atomicrmw.end:
+; GFX90A-NEXT: ret double [[TMP5]]
+;
+; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX942-NEXT: ret double [[RES]]
+;
+; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX10: atomicrmw.start:
+; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX10: atomicrmw.end:
+; GFX10-NEXT: ret double [[TMP5]]
+;
+; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX11: atomicrmw.start:
+; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX11: atomicrmw.end:
+; GFX11-NEXT: ret double [[TMP5]]
+;
+; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX12: atomicrmw.start:
+; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX12: atomicrmw.end:
+; GFX12-NEXT: ret double [[TMP5]]
+;
+ %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value seq_cst, align 8, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
+ ret double %res
+}
+
+define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX803: atomicrmw.start:
+; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX803: atomicrmw.end:
+; GFX803-NEXT: ret double [[TMP5]]
+;
+; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX906: atomicrmw.start:
+; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX906: atomicrmw.end:
+; GFX906-NEXT: ret double [[TMP5]]
+;
+; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX908: atomicrmw.start:
+; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX908: atomicrmw.end:
+; GFX908-NEXT: ret double [[TMP5]]
+;
+; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX90A-NEXT: ret double [[RES]]
+;
+; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX942-NEXT: ret double [[RES]]
+;
+; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX10: atomicrmw.start:
+; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX10: atomicrmw.end:
+; GFX10-NEXT: ret double [[TMP5]]
+;
+; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX11: atomicrmw.start:
+; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX11: atomicrmw.end:
+; GFX11-NEXT: ret double [[TMP5]]
+;
+; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX12: atomicrmw.start:
+; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX12: atomicrmw.end:
+; GFX12-NEXT: ret double [[TMP5]]
+;
+ %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value seq_cst, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
+ ret double %res
+}
+
+define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(ptr addrspace(1) %ptr, double %value) #0 {
+; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
+; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1]] {
+; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX803: atomicrmw.start:
+; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX803: atomicrmw.end:
+; GFX803-NEXT: ret double [[TMP5]]
+;
+; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
+; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1]] {
+; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX906: atomicrmw.start:
+; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX906: atomicrmw.end:
+; GFX906-NEXT: ret double [[TMP5]]
+;
+; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
+; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1]] {
+; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX908: atomicrmw.start:
+; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX908: atomicrmw.end:
+; GFX908-NEXT: ret double [[TMP5]]
+;
+; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
+; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1]] {
+; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX90A-NEXT: ret double [[RES]]
+;
+; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
+; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1]] {
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX942-NEXT: ret double [[RES]]
+;
+; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
+; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1]] {
+; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX10: atomicrmw.start:
+; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX10: atomicrmw.end:
+; GFX10-NEXT: ret double [[TMP5]]
+;
+; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
+; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1]] {
+; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX11: atomicrmw.start:
+; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX11: atomicrmw.end:
+; GFX11-NEXT: ret double [[TMP5]]
+;
+; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
+; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1]] {
+; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX12: atomicrmw.start:
+; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX12: atomicrmw.end:
+; GFX12-NEXT: ret double [[TMP5]]
+;
+ %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value seq_cst, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
+ ret double %res
+}
+
+define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(ptr addrspace(1) %ptr, double %value) #1 {
+; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
+; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2]] {
+; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX803: atomicrmw.start:
+; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX803: atomicrmw.end:
+; GFX803-NEXT: ret double [[TMP5]]
+;
+; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
+; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2]] {
+; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX906: atomicrmw.start:
+; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX906: atomicrmw.end:
+; GFX906-NEXT: ret double [[TMP5]]
+;
+; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
+; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2]] {
+; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX908: atomicrmw.start:
+; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX908: atomicrmw.end:
+; GFX908-NEXT: ret double [[TMP5]]
+;
+; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
+; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2]] {
+; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX90A-NEXT: ret double [[RES]]
+;
+; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
+; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2]] {
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX942-NEXT: ret double [[RES]]
+;
+; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
+; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2]] {
+; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX10: atomicrmw.start:
+; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX10: atomicrmw.end:
+; GFX10-NEXT: ret double [[TMP5]]
+;
+; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
+; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2]] {
+; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX11: atomicrmw.start:
+; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX11: atomicrmw.end:
+; GFX11-NEXT: ret double [[TMP5]]
+;
+; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
+; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2]] {
+; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX12: atomicrmw.start:
+; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX12: atomicrmw.end:
+; GFX12-NEXT: ret double [[TMP5]]
+;
+ %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value seq_cst, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
+ ret double %res
+}
+
+;---------------------------------------------------------------------
+; atomicrmw fsub
+;---------------------------------------------------------------------
+
+define double @test_atomicrmw_fsub_f64_global_system(ptr addrspace(1) %ptr, double %value) {
+; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_system(
+; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
+; COMMON: atomicrmw.start:
+; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
+; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
+; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; COMMON-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
+; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; COMMON: atomicrmw.end:
+; COMMON-NEXT: ret double [[RES]]
+;
+ %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value seq_cst
+ ret double %res
+}
+
+define double @test_atomicrmw_fsub_f64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
+; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_system__amdgpu_no_fine_grained_memory(
+; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
+; COMMON: atomicrmw.start:
+; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
+; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; COMMON-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
+; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; COMMON: atomicrmw.end:
+; COMMON-NEXT: ret double [[RES]]
+;
+ %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.fine.grained.memory !0
+ ret double %res
+}
+
+define double @test_atomicrmw_fsub_f64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
+; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_system__amdgpu_no_remote_memory(
+; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
+; COMMON: atomicrmw.start:
+; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
+; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; COMMON-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
+; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; COMMON: atomicrmw.end:
+; COMMON-NEXT: ret double [[RES]]
+;
+ %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.remote.memory !0
+ ret double %res
+}
+
+define double @test_atomicrmw_fsub_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
+; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
+; COMMON: atomicrmw.start:
+; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
+; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; COMMON-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
+; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; COMMON: atomicrmw.end:
+; COMMON-NEXT: ret double [[RES]]
+;
+ %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
+ ret double %res
+}
+
+define double @test_atomicrmw_fsub_f64_global_system__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, double %value) {
+; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_system__amdgpu_ignore_denormal_mode(
+; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
+; COMMON: atomicrmw.start:
+; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
+; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; COMMON-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; COMMON: atomicrmw.end:
+; COMMON-NEXT: ret double [[TMP5]]
+;
+ %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value seq_cst, align 8, !amdgpu.ignore.denormal.mode !0
+ ret double %res
+}
+
+define double @test_atomicrmw_fsub_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
+; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
+; COMMON: atomicrmw.start:
+; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; COMMON-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; COMMON: atomicrmw.end:
+; COMMON-NEXT: ret double [[TMP5]]
+;
+ %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value seq_cst, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
+ ret double %res
+}
+
+define double @test_atomicrmw_fsub_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
+; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
+; COMMON: atomicrmw.start:
+; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; COMMON-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; COMMON: atomicrmw.end:
+; COMMON-NEXT: ret double [[TMP5]]
+;
+ %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value seq_cst, align 8, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
+ ret double %res
+}
+
+define double @test_atomicrmw_fsub_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
+; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
+; COMMON: atomicrmw.start:
+; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; COMMON-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; COMMON: atomicrmw.end:
+; COMMON-NEXT: ret double [[TMP5]]
+;
+ %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value seq_cst, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
+ ret double %res
+}
+
+;---------------------------------------------------------------------
+; atomicrmw fmax
+;---------------------------------------------------------------------
+
+define double @test_atomicrmw_fmax_f64_global_system(ptr addrspace(1) %ptr, double %value) {
+; COMMON-LABEL: define double @test_atomicrmw_fmax_f64_global_system(
+; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
+; COMMON: atomicrmw.start:
+; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; COMMON-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; COMMON-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
+; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; COMMON-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; COMMON: atomicrmw.end:
+; COMMON-NEXT: ret double [[TMP6]]
+;
+ %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value seq_cst
+ ret double %res
+}
+
+define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory(
+; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX803: atomicrmw.start:
+; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX803: atomicrmw.end:
+; GFX803-NEXT: ret double [[TMP6]]
+;
+; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory(
+; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX906: atomicrmw.start:
+; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX906: atomicrmw.end:
+; GFX906-NEXT: ret double [[TMP6]]
+;
+; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory(
+; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX908: atomicrmw.start:
+; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX908: atomicrmw.end:
+; GFX908-NEXT: ret double [[TMP6]]
+;
+; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory(
+; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX90A-NEXT: ret double [[RES]]
+;
+; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory(
+; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX942-NEXT: ret double [[RES]]
+;
+; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory(
+; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX10-NEXT: ret double [[RES]]
+;
+; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory(
+; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX11: atomicrmw.start:
+; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX11: atomicrmw.end:
+; GFX11-NEXT: ret double [[TMP6]]
+;
+; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory(
+; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX12: atomicrmw.start:
+; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX12: atomicrmw.end:
+; GFX12-NEXT: ret double [[TMP6]]
+;
+ %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.fine.grained.memory !0
+ ret double %res
+}
+
+define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_remote_memory(
+; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX803: atomicrmw.start:
+; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX803: atomicrmw.end:
+; GFX803-NEXT: ret double [[TMP6]]
+;
+; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_remote_memory(
+; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX906: atomicrmw.start:
+; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX906: atomicrmw.end:
+; GFX906-NEXT: ret double [[TMP6]]
+;
+; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_remote_memory(
+; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX908: atomicrmw.start:
+; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX908: atomicrmw.end:
+; GFX908-NEXT: ret double [[TMP6]]
+;
+; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_remote_memory(
+; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX90A: atomicrmw.start:
+; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
+; GFX90A-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX90A-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX90A-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
+; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX90A: atomicrmw.end:
+; GFX90A-NEXT: ret double [[RES]]
+;
+; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_remote_memory(
+; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX942-NEXT: ret double [[RES]]
+;
+; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_remote_memory(
+; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX10: atomicrmw.start:
+; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
+; GFX10-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX10-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX10-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
+; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX10: atomicrmw.end:
+; GFX10-NEXT: ret double [[RES]]
+;
+; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_remote_memory(
+; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX11: atomicrmw.start:
+; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX11: atomicrmw.end:
+; GFX11-NEXT: ret double [[TMP6]]
+;
+; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_remote_memory(
+; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX12: atomicrmw.start:
+; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX12: atomicrmw.end:
+; GFX12-NEXT: ret double [[TMP6]]
+;
+ %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.remote.memory !0
+ ret double %res
+}
+
+define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX803: atomicrmw.start:
+; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX803: atomicrmw.end:
+; GFX803-NEXT: ret double [[TMP6]]
+;
+; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX906: atomicrmw.start:
+; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX906: atomicrmw.end:
+; GFX906-NEXT: ret double [[TMP6]]
+;
+; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX908: atomicrmw.start:
+; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX908: atomicrmw.end:
+; GFX908-NEXT: ret double [[TMP6]]
+;
+; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX90A-NEXT: ret double [[RES]]
+;
+; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX942-NEXT: ret double [[RES]]
+;
+; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX10-NEXT: ret double [[RES]]
+;
+; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX11: atomicrmw.start:
+; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX11: atomicrmw.end:
+; GFX11-NEXT: ret double [[TMP6]]
+;
+; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX12: atomicrmw.start:
+; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX12: atomicrmw.end:
+; GFX12-NEXT: ret double [[TMP6]]
;
- %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
+ %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
ret double %res
}
-define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, double %value) {
+; COMMON-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
+; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
-; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
+; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; COMMON-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; COMMON-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
+; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; COMMON-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[NEWLOADED]]
+; COMMON-NEXT: ret double [[TMP6]]
;
- %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
+ %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value seq_cst, align 8, !amdgpu.ignore.denormal.mode !0
ret double %res
}
-define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
-; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[NEWLOADED]]
+define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX803: atomicrmw.start:
+; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX803: atomicrmw.end:
+; GFX803-NEXT: ret double [[TMP6]]
;
- %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
- ret double %res
-}
-
-define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(ptr addrspace(1) %ptr, double %value) #0 {
-; COMMON-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] {
-; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
-; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[NEWLOADED]]
+; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX906: atomicrmw.start:
+; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX906: atomicrmw.end:
+; GFX906-NEXT: ret double [[TMP6]]
+;
+; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX908: atomicrmw.start:
+; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX908: atomicrmw.end:
+; GFX908-NEXT: ret double [[TMP6]]
+;
+; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX90A-NEXT: ret double [[RES]]
+;
+; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX942-NEXT: ret double [[RES]]
+;
+; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX10-NEXT: ret double [[RES]]
+;
+; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX11: atomicrmw.start:
+; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX11: atomicrmw.end:
+; GFX11-NEXT: ret double [[TMP6]]
+;
+; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX12: atomicrmw.start:
+; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX12: atomicrmw.end:
+; GFX12-NEXT: ret double [[TMP6]]
;
- %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
+ %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value seq_cst, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
ret double %res
}
-define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(ptr addrspace(1) %ptr, double %value) #1 {
-; COMMON-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] {
-; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
-; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[NEWLOADED]]
+define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX803: atomicrmw.start:
+; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX803: atomicrmw.end:
+; GFX803-NEXT: ret double [[TMP6]]
+;
+; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX906: atomicrmw.start:
+; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX906: atomicrmw.end:
+; GFX906-NEXT: ret double [[TMP6]]
+;
+; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX908: atomicrmw.start:
+; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX908: atomicrmw.end:
+; GFX908-NEXT: ret double [[TMP6]]
+;
+; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX90A: atomicrmw.start:
+; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX90A-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX90A-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX90A-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX90A: atomicrmw.end:
+; GFX90A-NEXT: ret double [[TMP6]]
+;
+; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX942-NEXT: ret double [[RES]]
+;
+; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX10: atomicrmw.start:
+; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX10-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX10-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX10-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX10: atomicrmw.end:
+; GFX10-NEXT: ret double [[TMP6]]
+;
+; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX11: atomicrmw.start:
+; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX11: atomicrmw.end:
+; GFX11-NEXT: ret double [[TMP6]]
+;
+; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
+; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX12: atomicrmw.start:
+; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX12: atomicrmw.end:
+; GFX12-NEXT: ret double [[TMP6]]
;
- %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
+ %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value seq_cst, align 8, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
ret double %res
}
-;---------------------------------------------------------------------
-; atomicrmw fsub
-;---------------------------------------------------------------------
-
-define double @test_atomicrmw_fsub_f64_global_system(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_system(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
-; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
-; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
-; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
-; COMMON-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[RES]]
+define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX803: atomicrmw.start:
+; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX803: atomicrmw.end:
+; GFX803-NEXT: ret double [[TMP6]]
;
- %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value seq_cst
- ret double %res
-}
-
-define double @test_atomicrmw_fsub_f64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_system__amdgpu_no_fine_grained_memory(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
-; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
-; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
-; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
-; COMMON-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[RES]]
+; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX906: atomicrmw.start:
+; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX906: atomicrmw.end:
+; GFX906-NEXT: ret double [[TMP6]]
;
- %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.fine.grained.memory !0
- ret double %res
-}
-
-define double @test_atomicrmw_fsub_f64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_system__amdgpu_no_remote_memory(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
-; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
-; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
-; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
-; COMMON-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[RES]]
+; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX908: atomicrmw.start:
+; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX908: atomicrmw.end:
+; GFX908-NEXT: ret double [[TMP6]]
;
- %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.remote.memory !0
- ret double %res
-}
-
-define double @test_atomicrmw_fsub_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
-; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
-; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
-; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
-; COMMON-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[RES]]
+; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX90A-NEXT: ret double [[RES]]
;
- %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
- ret double %res
-}
-
-define double @test_atomicrmw_fsub_f64_global_system__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_system__amdgpu_ignore_denormal_mode(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP9:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP9]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
-; COMMON-NEXT: [[TMP5]] = extractvalue { double, i1 } [[TMP8]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[TMP5]]
+; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX942-NEXT: ret double [[RES]]
;
- %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.ignore.denormal.mode !0
- ret double %res
-}
-
-define double @test_atomicrmw_fsub_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP9:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP9]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
-; COMMON-NEXT: [[TMP5]] = extractvalue { double, i1 } [[TMP8]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[TMP5]]
+; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX10-NEXT: ret double [[RES]]
;
- %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
- ret double %res
-}
-
-define double @test_atomicrmw_fsub_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP9:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP9]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
-; COMMON-NEXT: [[TMP5]] = extractvalue { double, i1 } [[TMP8]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[TMP5]]
+; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX11: atomicrmw.start:
+; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX11: atomicrmw.end:
+; GFX11-NEXT: ret double [[TMP6]]
;
- %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
- ret double %res
-}
-
-define double @test_atomicrmw_fsub_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP9:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP9]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
-; COMMON-NEXT: [[TMP5]] = extractvalue { double, i1 } [[TMP8]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[TMP5]]
+; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
+; GFX12: atomicrmw.start:
+; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
+; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
+; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
+; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
+; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
+; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX12: atomicrmw.end:
+; GFX12-NEXT: ret double [[TMP6]]
;
- %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
+ %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value seq_cst, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
ret double %res
}
;---------------------------------------------------------------------
-; atomicrmw fmax
+; atomicrmw fmin
;---------------------------------------------------------------------
-define double @test_atomicrmw_fmax_f64_global_system(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fmax_f64_global_system(
+define double @test_atomicrmw_fmin_f64_global_system(ptr addrspace(1) %ptr, double %value) {
+; COMMON-LABEL: define double @test_atomicrmw_fmin_f64_global_system(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; COMMON-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; COMMON-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
@@ -1103,18 +2226,18 @@ define double @test_atomicrmw_fmax_f64_global_system(ptr addrspace(1) %ptr, doub
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret double [[TMP6]]
;
- %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value seq_cst
+ %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value seq_cst
ret double %res
}
-define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
-; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory(
+define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory(
; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX803: atomicrmw.start:
; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
@@ -1125,13 +2248,13 @@ define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_mem
; GFX803: atomicrmw.end:
; GFX803-NEXT: ret double [[TMP6]]
;
-; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory(
+; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory(
; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX906: atomicrmw.start:
; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
@@ -1142,13 +2265,13 @@ define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_mem
; GFX906: atomicrmw.end:
; GFX906-NEXT: ret double [[TMP6]]
;
-; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory(
+; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory(
; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX908: atomicrmw.start:
; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
@@ -1159,28 +2282,28 @@ define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_mem
; GFX908: atomicrmw.end:
; GFX908-NEXT: ret double [[TMP6]]
;
-; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory(
+; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory(
; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
; GFX90A-NEXT: ret double [[RES]]
;
-; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory(
+; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory(
; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
; GFX942-NEXT: ret double [[RES]]
;
-; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory(
+; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory(
; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
; GFX10-NEXT: ret double [[RES]]
;
-; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory(
+; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory(
; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX11: atomicrmw.start:
; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
@@ -1191,13 +2314,13 @@ define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_mem
; GFX11: atomicrmw.end:
; GFX11-NEXT: ret double [[TMP6]]
;
-; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory(
+; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory(
; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX12: atomicrmw.start:
; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
@@ -1208,18 +2331,18 @@ define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_mem
; GFX12: atomicrmw.end:
; GFX12-NEXT: ret double [[TMP6]]
;
- %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.fine.grained.memory !0
+ %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.fine.grained.memory !0
ret double %res
}
-define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
-; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_remote_memory(
+define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_remote_memory(
; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX803: atomicrmw.start:
; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
@@ -1230,13 +2353,13 @@ define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_remote_memory(pt
; GFX803: atomicrmw.end:
; GFX803-NEXT: ret double [[TMP6]]
;
-; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_remote_memory(
+; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_remote_memory(
; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX906: atomicrmw.start:
; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
@@ -1247,13 +2370,13 @@ define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_remote_memory(pt
; GFX906: atomicrmw.end:
; GFX906-NEXT: ret double [[TMP6]]
;
-; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_remote_memory(
+; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_remote_memory(
; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX908: atomicrmw.start:
; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
@@ -1264,13 +2387,13 @@ define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_remote_memory(pt
; GFX908: atomicrmw.end:
; GFX908-NEXT: ret double [[TMP6]]
;
-; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_remote_memory(
+; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_remote_memory(
; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX90A: atomicrmw.start:
; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
-; GFX90A-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX90A-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX90A-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
@@ -1281,18 +2404,18 @@ define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_remote_memory(pt
; GFX90A: atomicrmw.end:
; GFX90A-NEXT: ret double [[RES]]
;
-; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_remote_memory(
+; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_remote_memory(
; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
; GFX942-NEXT: ret double [[RES]]
;
-; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_remote_memory(
+; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_remote_memory(
; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX10: atomicrmw.start:
; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
-; GFX10-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX10-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX10-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
@@ -1303,13 +2426,13 @@ define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_remote_memory(pt
; GFX10: atomicrmw.end:
; GFX10-NEXT: ret double [[RES]]
;
-; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_remote_memory(
+; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_remote_memory(
; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX11: atomicrmw.start:
; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
@@ -1320,13 +2443,13 @@ define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_remote_memory(pt
; GFX11: atomicrmw.end:
; GFX11-NEXT: ret double [[TMP6]]
;
-; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_remote_memory(
+; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_remote_memory(
; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX12: atomicrmw.start:
; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
@@ -1337,18 +2460,18 @@ define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_remote_memory(pt
; GFX12: atomicrmw.end:
; GFX12-NEXT: ret double [[TMP6]]
;
- %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.remote.memory !0
+ %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.remote.memory !0
ret double %res
}
-define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
-; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX803: atomicrmw.start:
; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
@@ -1359,13 +2482,13 @@ define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_mem
; GFX803: atomicrmw.end:
; GFX803-NEXT: ret double [[TMP6]]
;
-; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX906: atomicrmw.start:
; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
@@ -1376,13 +2499,13 @@ define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_mem
; GFX906: atomicrmw.end:
; GFX906-NEXT: ret double [[TMP6]]
;
-; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX908: atomicrmw.start:
; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
@@ -1393,28 +2516,28 @@ define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_mem
; GFX908: atomicrmw.end:
; GFX908-NEXT: ret double [[TMP6]]
;
-; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
; GFX90A-NEXT: ret double [[RES]]
;
-; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
; GFX942-NEXT: ret double [[RES]]
;
-; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
; GFX10-NEXT: ret double [[RES]]
;
-; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX11: atomicrmw.start:
; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
@@ -1425,13 +2548,13 @@ define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_mem
; GFX11: atomicrmw.end:
; GFX11-NEXT: ret double [[TMP6]]
;
-; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX12: atomicrmw.start:
; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
@@ -1442,140 +2565,12 @@ define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_mem
; GFX12: atomicrmw.end:
; GFX12-NEXT: ret double [[TMP6]]
;
- %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
- ret double %res
-}
-
-define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
-; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
-; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[TMP6]]
-;
- %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.ignore.denormal.mode !0
- ret double %res
-}
-
-define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
-; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
-; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[TMP6]]
-;
- %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
- ret double %res
-}
-
-define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
-; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
-; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[TMP6]]
-;
- %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
- ret double %res
-}
-
-define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
-; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
-; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[TMP6]]
-;
- %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
+ %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
ret double %res
}
-;---------------------------------------------------------------------
-; atomicrmw fmin
-;---------------------------------------------------------------------
-
-define double @test_atomicrmw_fmin_f64_global_system(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fmin_f64_global_system(
+define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, double %value) {
+; COMMON-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -1592,12 +2587,12 @@ define double @test_atomicrmw_fmin_f64_global_system(ptr addrspace(1) %ptr, doub
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret double [[TMP6]]
;
- %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value seq_cst
+ %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value seq_cst, align 8, !amdgpu.ignore.denormal.mode !0
ret double %res
}
-define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
-; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory(
+define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -1614,7 +2609,7 @@ define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_mem
; GFX803: atomicrmw.end:
; GFX803-NEXT: ret double [[TMP6]]
;
-; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory(
+; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -1631,7 +2626,7 @@ define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_mem
; GFX906: atomicrmw.end:
; GFX906-NEXT: ret double [[TMP6]]
;
-; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory(
+; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -1648,22 +2643,22 @@ define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_mem
; GFX908: atomicrmw.end:
; GFX908-NEXT: ret double [[TMP6]]
;
-; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory(
+; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
; GFX90A-NEXT: ret double [[RES]]
;
-; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory(
+; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
; GFX942-NEXT: ret double [[RES]]
;
-; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory(
+; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
; GFX10-NEXT: ret double [[RES]]
;
-; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory(
+; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -1680,7 +2675,7 @@ define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_mem
; GFX11: atomicrmw.end:
; GFX11-NEXT: ret double [[TMP6]]
;
-; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory(
+; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -1697,12 +2692,12 @@ define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_mem
; GFX12: atomicrmw.end:
; GFX12-NEXT: ret double [[TMP6]]
;
- %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.fine.grained.memory !0
+ %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value seq_cst, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
ret double %res
}
-define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
-; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_remote_memory(
+define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -1719,7 +2714,7 @@ define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_remote_memory(pt
; GFX803: atomicrmw.end:
; GFX803-NEXT: ret double [[TMP6]]
;
-; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_remote_memory(
+; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -1736,7 +2731,7 @@ define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_remote_memory(pt
; GFX906: atomicrmw.end:
; GFX906-NEXT: ret double [[TMP6]]
;
-; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_remote_memory(
+; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -1753,46 +2748,46 @@ define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_remote_memory(pt
; GFX908: atomicrmw.end:
; GFX908-NEXT: ret double [[TMP6]]
;
-; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_remote_memory(
+; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX90A: atomicrmw.start:
-; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
+; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; GFX90A-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX90A-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
-; GFX90A-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
+; GFX90A-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX90A: atomicrmw.end:
-; GFX90A-NEXT: ret double [[RES]]
+; GFX90A-NEXT: ret double [[TMP6]]
;
-; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_remote_memory(
+; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
; GFX942-NEXT: ret double [[RES]]
;
-; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_remote_memory(
+; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX10: atomicrmw.start:
-; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
+; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; GFX10-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
; GFX10-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
-; GFX10-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
+; GFX10-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX10: atomicrmw.end:
-; GFX10-NEXT: ret double [[RES]]
+; GFX10-NEXT: ret double [[TMP6]]
;
-; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_remote_memory(
+; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -1809,7 +2804,7 @@ define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_remote_memory(pt
; GFX11: atomicrmw.end:
; GFX11-NEXT: ret double [[TMP6]]
;
-; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_remote_memory(
+; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -1826,12 +2821,12 @@ define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_remote_memory(pt
; GFX12: atomicrmw.end:
; GFX12-NEXT: ret double [[TMP6]]
;
- %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.remote.memory !0
+ %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value seq_cst, align 8, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
ret double %res
}
-define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
-; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
+; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -1848,7 +2843,7 @@ define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_mem
; GFX803: atomicrmw.end:
; GFX803-NEXT: ret double [[TMP6]]
;
-; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -1865,7 +2860,7 @@ define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_mem
; GFX906: atomicrmw.end:
; GFX906-NEXT: ret double [[TMP6]]
;
-; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -1882,22 +2877,22 @@ define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_mem
; GFX908: atomicrmw.end:
; GFX908-NEXT: ret double [[TMP6]]
;
-; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
; GFX90A-NEXT: ret double [[RES]]
;
-; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
; GFX942-NEXT: ret double [[RES]]
;
-; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
; GFX10-NEXT: ret double [[RES]]
;
-; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -1914,7 +2909,7 @@ define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_mem
; GFX11: atomicrmw.end:
; GFX11-NEXT: ret double [[TMP6]]
;
-; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
+; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -1931,131 +2926,7 @@ define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_mem
; GFX12: atomicrmw.end:
; GFX12-NEXT: ret double [[TMP6]]
;
- %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
- ret double %res
-}
-
-define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
-; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
-; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[TMP6]]
-;
- %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.ignore.denormal.mode !0
- ret double %res
-}
-
-define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
-; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
-; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[TMP6]]
-;
- %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
- ret double %res
-}
-
-define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
-; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
-; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[TMP6]]
-;
- %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
- ret double %res
-}
-
-define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
-; COMMON: atomicrmw.start:
-; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
-; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
-; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
-; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
-; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON: atomicrmw.end:
-; COMMON-NEXT: ret double [[TMP6]]
-;
- %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
+ %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value seq_cst, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
ret double %res
}
More information about the llvm-commits
mailing list