[llvm-branch-commits] [llvm] AtomicExpand: Stop using report_fatal_error (PR #147300)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Jul 7 06:40:27 PDT 2025
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/147300
Emit a context error and delete the instruction. This
allows removing the AMDGPU hack where some atomic libcalls
are falsely added. NVPTX also later copied the same hack,
so remove it there too.
For now just emit the generic error, which is not good. It's
missing any useful context information (despite taking the instruction).
It's also confusing in the failed atomicrmw case, since it's reporting
failure at the intermediate failed cmpxchg instead of the original
atomicrmw.
>From 4d46f60b03774704354e98ccea89d4c622c7d300 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 7 Jul 2025 21:25:22 +0900
Subject: [PATCH] AtomicExpand: Stop using report_fatal_error
Emit a context error and delete the instruction. This
allows removing the AMDGPU hack where some atomic libcalls
are falsely added. NVPTX also later copied the same hack,
so remove it there too.
For now just emit the generic error, which is not good. It's
missing any useful context information (despite taking the instruction).
It's also confusing in the failed atomicrmw case, since it's reporting
failure at the intermediate failed cmpxchg instead of the original
atomicrmw.
---
llvm/lib/CodeGen/AtomicExpandPass.cpp | 17 +-
llvm/lib/IR/RuntimeLibcalls.cpp | 6 +-
llvm/test/CodeGen/AMDGPU/atomic-oversize.ll | 10 -
.../CodeGen/AMDGPU/unsupported-atomics.ll | 55 ++
.../CodeGen/NVPTX/atomicrmw-expand.err.ll | 27 +
llvm/test/CodeGen/NVPTX/atomicrmw-expand.ll | 28 -
.../AMDGPU/expand-atomic-fp128.ll | 122 ---
.../AtomicExpand/AMDGPU/expand-atomic-i128.ll | 201 -----
.../AtomicExpand/AMDGPU/expand-atomic-mmra.ll | 25 -
.../AMDGPU/expand-atomicrmw-fp-vector.ll | 752 +-----------------
.../AtomicExpand/AMDGPU/unaligned-atomic.ll | 22 +-
.../TableGen/Basic/RuntimeLibcallsEmitter.cpp | 19 +-
12 files changed, 106 insertions(+), 1178 deletions(-)
delete mode 100644 llvm/test/CodeGen/AMDGPU/atomic-oversize.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/unsupported-atomics.ll
create mode 100644 llvm/test/CodeGen/NVPTX/atomicrmw-expand.err.ll
delete mode 100644 llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-fp128.ll
delete mode 100644 llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i128.ll
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 044f0732779f3..44295b44482e7 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -65,6 +65,17 @@ class AtomicExpandImpl {
const DataLayout *DL = nullptr;
private:
+ void handleFailure(Instruction &FailedInst, const Twine &Msg) const {
+ LLVMContext &Ctx = FailedInst.getContext();
+
+ // TODO: Do not use generic error type
+ Ctx.emitError(&FailedInst, Msg);
+
+ if (!FailedInst.getType()->isVoidTy())
+ FailedInst.replaceAllUsesWith(PoisonValue::get(FailedInst.getType()));
+ FailedInst.eraseFromParent();
+ }
+
bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
@@ -1744,7 +1755,7 @@ void AtomicExpandImpl::expandAtomicLoadToLibcall(LoadInst *I) {
I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
if (!expanded)
- report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Load");
+ handleFailure(*I, "unsupported atomic load");
}
void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) {
@@ -1757,7 +1768,7 @@ void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) {
I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
if (!expanded)
- report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Store");
+ handleFailure(*I, "unsupported atomic store");
}
void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
@@ -1772,7 +1783,7 @@ void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
Libcalls);
if (!expanded)
- report_fatal_error("expandAtomicOpToLibcall shouldn't fail for CAS");
+ handleFailure(*I, "unsupported cmpxchg");
}
static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) {
diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp
index 712f1a48d0b7b..b21504037be8f 100644
--- a/llvm/lib/IR/RuntimeLibcalls.cpp
+++ b/llvm/lib/IR/RuntimeLibcalls.cpp
@@ -455,10 +455,8 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT,
// Disable most libcalls on AMDGPU and NVPTX.
if (TT.isAMDGPU() || TT.isNVPTX()) {
- for (RTLIB::Libcall LC : RTLIB::libcalls()) {
- if (!isAtomicLibCall(LC))
- setLibcallImpl(LC, RTLIB::Unsupported);
- }
+ for (RTLIB::Libcall LC : RTLIB::libcalls())
+ setLibcallImpl(LC, RTLIB::Unsupported);
}
if (TT.isOSMSVCRT()) {
diff --git a/llvm/test/CodeGen/AMDGPU/atomic-oversize.ll b/llvm/test/CodeGen/AMDGPU/atomic-oversize.ll
deleted file mode 100644
index f62a93f523365..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/atomic-oversize.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s
-
-define void @test(ptr %a) nounwind {
-; CHECK-LABEL: test:
-; CHECK: __atomic_load_16
-; CHECK: __atomic_store_16
- %1 = load atomic i128, ptr %a seq_cst, align 16
- store atomic i128 %1, ptr %a seq_cst, align 16
- ret void
-}
diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-atomics.ll b/llvm/test/CodeGen/AMDGPU/unsupported-atomics.ll
new file mode 100644
index 0000000000000..f569c15c27e5d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/unsupported-atomics.ll
@@ -0,0 +1,55 @@
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=null %s 2>&1 | FileCheck %s
+
+; CHECK: error: unsupported atomic load
+define i128 @test_load_i128(ptr %p) nounwind {
+ %ld = load atomic i128, ptr %p seq_cst, align 16
+ ret i128 %ld
+}
+
+; CHECK: error: unsupported atomic store
+define void @test_store_i128(ptr %p, i128 %val) nounwind {
+ store atomic i128 %val, ptr %p seq_cst, align 16
+ ret void
+}
+
+; CHECK: error: unsupported cmpxchg
+define { i128, i1 } @cmpxchg_i128(ptr %p, i128 %cmp, i128 %val) nounwind {
+ %ret = cmpxchg ptr %p, i128 %cmp, i128 %val seq_cst monotonic
+ ret { i128, i1 } %ret
+}
+
+; CHECK: error: unsupported cmpxchg
+define i128 @atomicrmw_xchg_i128(ptr %p, i128 %val) nounwind {
+ %ret = atomicrmw xchg ptr %p, i128 %val seq_cst
+ ret i128 %ret
+}
+
+; CHECK: error: unsupported cmpxchg
+define i64 @atomicrmw_xchg_i64_align4(ptr %p, i64 %val) nounwind {
+ %ret = atomicrmw xchg ptr %p, i64 %val seq_cst, align 4
+ ret i64 %ret
+}
+
+; CHECK: error: unsupported cmpxchg
+define double @atomicrmw_fadd_f64_align4(ptr %p, double %val) nounwind {
+ %ret = atomicrmw fadd ptr %p, double %val seq_cst, align 4
+ ret double %ret
+}
+
+; CHECK: error: unsupported cmpxchg
+define fp128 @atomicrmw_fadd_f128_align4(ptr %p, fp128 %val) nounwind {
+ %ret = atomicrmw fadd ptr %p, fp128 %val seq_cst, align 4
+ ret fp128 %ret
+}
+
+; CHECK: error: unsupported cmpxchg
+define fp128 @atomicrmw_fadd_f128(ptr %p, fp128 %val) nounwind {
+ %ret = atomicrmw fadd ptr %p, fp128 %val seq_cst, align 16
+ ret fp128 %ret
+}
+
+; CHECK: error: unsupported cmpxchg
+define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent_align2(ptr addrspace(1) %ptr, <2 x half> %value) {
+ %res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %value syncscope("agent") seq_cst, align 2
+ ret <2 x half> %res
+}
diff --git a/llvm/test/CodeGen/NVPTX/atomicrmw-expand.err.ll b/llvm/test/CodeGen/NVPTX/atomicrmw-expand.err.ll
new file mode 100644
index 0000000000000..b19f6d56b9a91
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/atomicrmw-expand.err.ll
@@ -0,0 +1,27 @@
+; RUN: not llc -mtriple=nvptx64 -mcpu=sm_30 -filetype=null %s 2>&1 | FileCheck %s
+
+; CHECK: error: unsupported cmpxchg
+; CHECK: error: unsupported cmpxchg
+; CHECK: error: unsupported cmpxchg
+; CHECK: error: unsupported cmpxchg
+define void @bitwise_i128(ptr %0, i128 %1) {
+entry:
+ %2 = atomicrmw and ptr %0, i128 %1 monotonic, align 16
+ %3 = atomicrmw or ptr %0, i128 %1 monotonic, align 16
+ %4 = atomicrmw xor ptr %0, i128 %1 monotonic, align 16
+ %5 = atomicrmw xchg ptr %0, i128 %1 monotonic, align 16
+ ret void
+}
+
+; CHECK: error: unsupported cmpxchg
+; CHECK: error: unsupported cmpxchg
+; CHECK: error: unsupported cmpxchg
+; CHECK: error: unsupported cmpxchg
+define void @minmax_i128(ptr %0, i128 %1) {
+entry:
+ %2 = atomicrmw min ptr %0, i128 %1 monotonic, align 16
+ %3 = atomicrmw max ptr %0, i128 %1 monotonic, align 16
+ %4 = atomicrmw umin ptr %0, i128 %1 monotonic, align 16
+ %5 = atomicrmw umax ptr %0, i128 %1 monotonic, align 16
+ ret void
+}
diff --git a/llvm/test/CodeGen/NVPTX/atomicrmw-expand.ll b/llvm/test/CodeGen/NVPTX/atomicrmw-expand.ll
index 2ce24ca78447e..c7a0c60ae1f4d 100644
--- a/llvm/test/CodeGen/NVPTX/atomicrmw-expand.ll
+++ b/llvm/test/CodeGen/NVPTX/atomicrmw-expand.ll
@@ -139,31 +139,3 @@ entry:
%5 = atomicrmw umax ptr %0, i16 %1 monotonic, align 2
ret void
}
-
-; CHECK-LABEL: bitwise_i128
-define void @bitwise_i128(ptr %0, i128 %1) {
-entry:
- ; ALL: __atomic_fetch_and_16
- %2 = atomicrmw and ptr %0, i128 %1 monotonic, align 16
- ; ALL: __atomic_fetch_or_16
- %3 = atomicrmw or ptr %0, i128 %1 monotonic, align 16
- ; ALL: __atomic_fetch_xor_16
- %4 = atomicrmw xor ptr %0, i128 %1 monotonic, align 16
- ; ALL: __atomic_exchange_16
- %5 = atomicrmw xchg ptr %0, i128 %1 monotonic, align 16
- ret void
-}
-
-; CHECK-LABEL: minmax_i128
-define void @minmax_i128(ptr %0, i128 %1) {
-entry:
- ; ALL: __atomic_compare_exchange_16
- %2 = atomicrmw min ptr %0, i128 %1 monotonic, align 16
- ; ALL: __atomic_compare_exchange_16
- %3 = atomicrmw max ptr %0, i128 %1 monotonic, align 16
- ; ALL: __atomic_compare_exchange_16
- %4 = atomicrmw umin ptr %0, i128 %1 monotonic, align 16
- ; ALL: __atomic_compare_exchange_16
- %5 = atomicrmw umax ptr %0, i128 %1 monotonic, align 16
- ret void
-}
diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-fp128.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-fp128.ll
deleted file mode 100644
index e70ab325dd8f3..0000000000000
--- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-fp128.ll
+++ /dev/null
@@ -1,122 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=atomic-expand %s | FileCheck %s
-
-define fp128 @test_atomicrmw_xchg_fp128_global_agent(ptr addrspace(1) %ptr, fp128 %value) {
-; CHECK-LABEL: @test_atomicrmw_xchg_fp128_global_agent(
-; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[PTR:%.*]] to ptr
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast fp128 [[VALUE:%.*]] to i128
-; CHECK-NEXT: [[TMP3:%.*]] = call i128 @__atomic_exchange_16(ptr [[TMP1]], i128 [[TMP2]], i32 5)
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i128 [[TMP3]] to fp128
-; CHECK-NEXT: ret fp128 [[TMP4]]
-;
- %res = atomicrmw xchg ptr addrspace(1) %ptr, fp128 %value syncscope("agent") seq_cst
- ret fp128 %res
-}
-
-define fp128 @test_atomicrmw_fadd_fp128_global_agent(ptr addrspace(1) %ptr, fp128 %value) {
-; CHECK-LABEL: @test_atomicrmw_fadd_fp128_global_agent(
-; CHECK-NEXT: [[TMP1:%.*]] = alloca fp128, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = load fp128, ptr addrspace(1) [[PTR:%.*]], align 16
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi fp128 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[NEW:%.*]] = fadd fp128 [[LOADED]], [[VALUE:%.*]]
-; CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store fp128 [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast fp128 [[NEW]] to i128
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[TMP3]], ptr addrspace(5) [[TMP1]], i128 [[TMP4]], i32 5, i32 5)
-; CHECK-NEXT: [[TMP6:%.*]] = load fp128, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { fp128, i1 } poison, fp128 [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { fp128, i1 } [[TMP7]], i1 [[TMP5]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { fp128, i1 } [[TMP8]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { fp128, i1 } [[TMP8]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret fp128 [[NEWLOADED]]
-;
- %res = atomicrmw fadd ptr addrspace(1) %ptr, fp128 %value syncscope("agent") seq_cst
- ret fp128 %res
-}
-
-define fp128 @test_atomicrmw_fsub_fp128_global_agent(ptr addrspace(1) %ptr, fp128 %value) {
-; CHECK-LABEL: @test_atomicrmw_fsub_fp128_global_agent(
-; CHECK-NEXT: [[TMP1:%.*]] = alloca fp128, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = load fp128, ptr addrspace(1) [[PTR:%.*]], align 16
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi fp128 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[NEW:%.*]] = fsub fp128 [[LOADED]], [[VALUE:%.*]]
-; CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store fp128 [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast fp128 [[NEW]] to i128
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[TMP3]], ptr addrspace(5) [[TMP1]], i128 [[TMP4]], i32 5, i32 5)
-; CHECK-NEXT: [[TMP6:%.*]] = load fp128, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { fp128, i1 } poison, fp128 [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { fp128, i1 } [[TMP7]], i1 [[TMP5]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { fp128, i1 } [[TMP8]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { fp128, i1 } [[TMP8]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret fp128 [[NEWLOADED]]
-;
- %res = atomicrmw fsub ptr addrspace(1) %ptr, fp128 %value syncscope("agent") seq_cst
- ret fp128 %res
-}
-
-define fp128 @test_atomicrmw_fmin_fp128_global_agent(ptr addrspace(1) %ptr, fp128 %value) {
-; CHECK-LABEL: @test_atomicrmw_fmin_fp128_global_agent(
-; CHECK-NEXT: [[TMP1:%.*]] = alloca fp128, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = load fp128, ptr addrspace(1) [[PTR:%.*]], align 16
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi fp128 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = call fp128 @llvm.minnum.f128(fp128 [[LOADED]], fp128 [[VALUE:%.*]])
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store fp128 [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast fp128 [[TMP3]] to i128
-; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[TMP4]], ptr addrspace(5) [[TMP1]], i128 [[TMP5]], i32 5, i32 5)
-; CHECK-NEXT: [[TMP7:%.*]] = load fp128, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { fp128, i1 } poison, fp128 [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { fp128, i1 } [[TMP8]], i1 [[TMP6]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { fp128, i1 } [[TMP9]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { fp128, i1 } [[TMP9]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret fp128 [[NEWLOADED]]
-;
- %res = atomicrmw fmin ptr addrspace(1) %ptr, fp128 %value syncscope("agent") seq_cst
- ret fp128 %res
-}
-
-define fp128 @test_atomicrmw_fmax_fp128_global_agent(ptr addrspace(1) %ptr, fp128 %value) {
-; CHECK-LABEL: @test_atomicrmw_fmax_fp128_global_agent(
-; CHECK-NEXT: [[TMP1:%.*]] = alloca fp128, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = load fp128, ptr addrspace(1) [[PTR:%.*]], align 16
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi fp128 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = call fp128 @llvm.maxnum.f128(fp128 [[LOADED]], fp128 [[VALUE:%.*]])
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store fp128 [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast fp128 [[TMP3]] to i128
-; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[TMP4]], ptr addrspace(5) [[TMP1]], i128 [[TMP5]], i32 5, i32 5)
-; CHECK-NEXT: [[TMP7:%.*]] = load fp128, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { fp128, i1 } poison, fp128 [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { fp128, i1 } [[TMP8]], i1 [[TMP6]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { fp128, i1 } [[TMP9]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { fp128, i1 } [[TMP9]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret fp128 [[NEWLOADED]]
-;
- %res = atomicrmw fmax ptr addrspace(1) %ptr, fp128 %value syncscope("agent") seq_cst
- ret fp128 %res
-}
diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i128.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i128.ll
deleted file mode 100644
index 37ccbd973bdeb..0000000000000
--- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i128.ll
+++ /dev/null
@@ -1,201 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=atomic-expand %s | FileCheck %s
-
-define i128 @test_atomicrmw_xchg_i128_global(ptr addrspace(1) %ptr, i128 %value) {
-; CHECK-LABEL: @test_atomicrmw_xchg_i128_global(
-; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[PTR:%.*]] to ptr
-; CHECK-NEXT: [[TMP2:%.*]] = call i128 @__atomic_exchange_16(ptr [[TMP1]], i128 [[VALUE:%.*]], i32 5)
-; CHECK-NEXT: ret i128 [[TMP2]]
-;
- %res = atomicrmw xchg ptr addrspace(1) %ptr, i128 %value seq_cst
- ret i128 %res
-}
-
-define i128 @test_atomicrmw_add_i128_global(ptr addrspace(1) %ptr, i128 %value) {
-; CHECK-LABEL: @test_atomicrmw_add_i128_global(
-; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[PTR:%.*]] to ptr
-; CHECK-NEXT: [[TMP2:%.*]] = call i128 @__atomic_fetch_add_16(ptr [[TMP1]], i128 [[VALUE:%.*]], i32 5)
-; CHECK-NEXT: ret i128 [[TMP2]]
-;
- %res = atomicrmw add ptr addrspace(1) %ptr, i128 %value seq_cst
- ret i128 %res
-}
-
-define i128 @test_atomicrmw_sub_i128_global(ptr addrspace(1) %ptr, i128 %value) {
-; CHECK-LABEL: @test_atomicrmw_sub_i128_global(
-; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[PTR:%.*]] to ptr
-; CHECK-NEXT: [[TMP2:%.*]] = call i128 @__atomic_fetch_sub_16(ptr [[TMP1]], i128 [[VALUE:%.*]], i32 5)
-; CHECK-NEXT: ret i128 [[TMP2]]
-;
- %res = atomicrmw sub ptr addrspace(1) %ptr, i128 %value seq_cst
- ret i128 %res
-}
-
-define i128 @test_atomicrmw_and_i128_global(ptr addrspace(1) %ptr, i128 %value) {
-; CHECK-LABEL: @test_atomicrmw_and_i128_global(
-; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[PTR:%.*]] to ptr
-; CHECK-NEXT: [[TMP2:%.*]] = call i128 @__atomic_fetch_and_16(ptr [[TMP1]], i128 [[VALUE:%.*]], i32 5)
-; CHECK-NEXT: ret i128 [[TMP2]]
-;
- %res = atomicrmw and ptr addrspace(1) %ptr, i128 %value seq_cst
- ret i128 %res
-}
-
-define i128 @test_atomicrmw_nand_i128_global(ptr addrspace(1) %ptr, i128 %value) {
-; CHECK-LABEL: @test_atomicrmw_nand_i128_global(
-; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[PTR:%.*]] to ptr
-; CHECK-NEXT: [[TMP2:%.*]] = call i128 @__atomic_fetch_nand_16(ptr [[TMP1]], i128 [[VALUE:%.*]], i32 5)
-; CHECK-NEXT: ret i128 [[TMP2]]
-;
- %res = atomicrmw nand ptr addrspace(1) %ptr, i128 %value seq_cst
- ret i128 %res
-}
-
-define i128 @test_atomicrmw_or_i128_global(ptr addrspace(1) %ptr, i128 %value) {
-; CHECK-LABEL: @test_atomicrmw_or_i128_global(
-; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[PTR:%.*]] to ptr
-; CHECK-NEXT: [[TMP2:%.*]] = call i128 @__atomic_fetch_or_16(ptr [[TMP1]], i128 [[VALUE:%.*]], i32 5)
-; CHECK-NEXT: ret i128 [[TMP2]]
-;
- %res = atomicrmw or ptr addrspace(1) %ptr, i128 %value seq_cst
- ret i128 %res
-}
-
-define i128 @test_atomicrmw_xor_i128_global(ptr addrspace(1) %ptr, i128 %value) {
-; CHECK-LABEL: @test_atomicrmw_xor_i128_global(
-; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[PTR:%.*]] to ptr
-; CHECK-NEXT: [[TMP2:%.*]] = call i128 @__atomic_fetch_xor_16(ptr [[TMP1]], i128 [[VALUE:%.*]], i32 5)
-; CHECK-NEXT: ret i128 [[TMP2]]
-;
- %res = atomicrmw xor ptr addrspace(1) %ptr, i128 %value seq_cst
- ret i128 %res
-}
-
-define i128 @test_atomicrmw_max_i128_global(ptr addrspace(1) %ptr, i128 %value) {
-; CHECK-LABEL: @test_atomicrmw_max_i128_global(
-; CHECK-NEXT: [[TMP1:%.*]] = alloca i128, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr addrspace(1) [[PTR:%.*]], align 16
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i128 [[LOADED]], [[VALUE:%.*]]
-; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i128 [[LOADED]], i128 [[VALUE]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store i128 [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[TMP4]], ptr addrspace(5) [[TMP1]], i128 [[NEW]], i32 5, i32 5)
-; CHECK-NEXT: [[TMP6:%.*]] = load i128, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { i128, i1 } poison, i128 [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { i128, i1 } [[TMP7]], i1 [[TMP5]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i128, i1 } [[TMP8]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i128, i1 } [[TMP8]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret i128 [[NEWLOADED]]
-;
- %res = atomicrmw max ptr addrspace(1) %ptr, i128 %value seq_cst
- ret i128 %res
-}
-
-define i128 @test_atomicrmw_min_i128_global(ptr addrspace(1) %ptr, i128 %value) {
-; CHECK-LABEL: @test_atomicrmw_min_i128_global(
-; CHECK-NEXT: [[TMP1:%.*]] = alloca i128, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr addrspace(1) [[PTR:%.*]], align 16
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = icmp sle i128 [[LOADED]], [[VALUE:%.*]]
-; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i128 [[LOADED]], i128 [[VALUE]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store i128 [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[TMP4]], ptr addrspace(5) [[TMP1]], i128 [[NEW]], i32 5, i32 5)
-; CHECK-NEXT: [[TMP6:%.*]] = load i128, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { i128, i1 } poison, i128 [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { i128, i1 } [[TMP7]], i1 [[TMP5]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i128, i1 } [[TMP8]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i128, i1 } [[TMP8]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret i128 [[NEWLOADED]]
-;
- %res = atomicrmw min ptr addrspace(1) %ptr, i128 %value seq_cst
- ret i128 %res
-}
-
-define i128 @test_atomicrmw_umax_i128_global(ptr addrspace(1) %ptr, i128 %value) {
-; CHECK-LABEL: @test_atomicrmw_umax_i128_global(
-; CHECK-NEXT: [[TMP1:%.*]] = alloca i128, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr addrspace(1) [[PTR:%.*]], align 16
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i128 [[LOADED]], [[VALUE:%.*]]
-; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i128 [[LOADED]], i128 [[VALUE]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store i128 [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[TMP4]], ptr addrspace(5) [[TMP1]], i128 [[NEW]], i32 5, i32 5)
-; CHECK-NEXT: [[TMP6:%.*]] = load i128, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { i128, i1 } poison, i128 [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { i128, i1 } [[TMP7]], i1 [[TMP5]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i128, i1 } [[TMP8]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i128, i1 } [[TMP8]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret i128 [[NEWLOADED]]
-;
- %res = atomicrmw umax ptr addrspace(1) %ptr, i128 %value seq_cst
- ret i128 %res
-}
-
-define i128 @test_atomicrmw_umin_i128_global(ptr addrspace(1) %ptr, i128 %value) {
-; CHECK-LABEL: @test_atomicrmw_umin_i128_global(
-; CHECK-NEXT: [[TMP1:%.*]] = alloca i128, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr addrspace(1) [[PTR:%.*]], align 16
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ule i128 [[LOADED]], [[VALUE:%.*]]
-; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i128 [[LOADED]], i128 [[VALUE]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store i128 [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[TMP4]], ptr addrspace(5) [[TMP1]], i128 [[NEW]], i32 5, i32 5)
-; CHECK-NEXT: [[TMP6:%.*]] = load i128, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { i128, i1 } poison, i128 [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { i128, i1 } [[TMP7]], i1 [[TMP5]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i128, i1 } [[TMP8]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i128, i1 } [[TMP8]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret i128 [[NEWLOADED]]
-;
- %res = atomicrmw umin ptr addrspace(1) %ptr, i128 %value seq_cst
- ret i128 %res
-}
-
-define i128 @test_cmpxchg_i128_global(ptr addrspace(1) %out, i128 %in, i128 %old) {
-; CHECK-LABEL: @test_cmpxchg_i128_global(
-; CHECK-NEXT: [[TMP1:%.*]] = alloca i128, align 8, addrspace(5)
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr i128, ptr addrspace(1) [[OUT:%.*]], i64 4
-; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[GEP]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store i128 [[OLD:%.*]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: [[TMP3:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[TMP2]], ptr addrspace(5) [[TMP1]], i128 [[IN:%.*]], i32 5, i32 5)
-; CHECK-NEXT: [[TMP4:%.*]] = load i128, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP5:%.*]] = insertvalue { i128, i1 } poison, i128 [[TMP4]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { i128, i1 } [[TMP5]], i1 [[TMP3]], 1
-; CHECK-NEXT: [[EXTRACT:%.*]] = extractvalue { i128, i1 } [[TMP6]], 0
-; CHECK-NEXT: ret i128 [[EXTRACT]]
-;
- %gep = getelementptr i128, ptr addrspace(1) %out, i64 4
- %res = cmpxchg ptr addrspace(1) %gep, i128 %old, i128 %in seq_cst seq_cst
- %extract = extractvalue {i128, i1} %res, 0
- ret i128 %extract
-}
diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll
index e79bb465563e8..20783969b14dc 100644
--- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll
+++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll
@@ -160,31 +160,6 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) {
ret void
}
-define i32 @atomic_load_global_align1(ptr addrspace(1) %ptr) {
-; GFX90A-LABEL: define i32 @atomic_load_global_align1(
-; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
-; GFX90A-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; GFX90A-NEXT: [[TMP2:%.*]] = alloca i32, align 4, addrspace(5)
-; GFX90A-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
-; GFX90A-NEXT: call void @__atomic_load(i64 4, ptr [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5)
-; GFX90A-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(5) [[TMP2]], align 4
-; GFX90A-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
-; GFX90A-NEXT: ret i32 [[TMP3]]
-;
-; GFX1100-LABEL: define i32 @atomic_load_global_align1(
-; GFX1100-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
-; GFX1100-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; GFX1100-NEXT: [[TMP2:%.*]] = alloca i32, align 4, addrspace(5)
-; GFX1100-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
-; GFX1100-NEXT: call void @__atomic_load(i64 4, ptr [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5)
-; GFX1100-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(5) [[TMP2]], align 4
-; GFX1100-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
-; GFX1100-NEXT: ret i32 [[TMP3]]
-;
- %val = load atomic i32, ptr addrspace(1) %ptr seq_cst, align 1, !mmra !2
- ret i32 %val
-}
-
!0 = !{!"foo", !"bar"}
!1 = !{!"bux", !"baz"}
!2 = !{!0, !1}
diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-fp-vector.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-fp-vector.ll
index 5736a3fe38590..c388877f9c284 100644
--- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-fp-vector.ll
+++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-fp-vector.ll
@@ -7,71 +7,9 @@
; atomicrmw fadd
;---------------------------------------------------------------------
-define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent_align2(ptr addrspace(1) %ptr, <2 x half> %value) {
-; CHECK-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent_align2(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x half>, align 4, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x half>, align 4, addrspace(5)
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 2
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[NEW:%.*]] = fadd <2 x half> [[LOADED]], [[VALUE]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store <2 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: store <2 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 4
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP6:%.*]] = load <2 x half>, ptr addrspace(5) [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <2 x half>, i1 } poison, <2 x half> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x half>, i1 } [[TMP8]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x half>, i1 } [[TMP8]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret <2 x half> [[NEWLOADED]]
-;
- %res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %value syncscope("agent") seq_cst, align 2
- ret <2 x half> %res
-}
-
-define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent_align2(ptr addrspace(1) %ptr, <2 x bfloat> %value) {
-; CHECK-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent_align2(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 2
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[NEW:%.*]] = fadd <2 x bfloat> [[LOADED]], [[VALUE]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store <2 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: store <2 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 4
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP6:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <2 x bfloat>, i1 } poison, <2 x bfloat> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x bfloat>, i1 } [[TMP8]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x bfloat>, i1 } [[TMP8]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret <2 x bfloat> [[NEWLOADED]]
-;
- %res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x bfloat> %value syncscope("agent") seq_cst, align 2
- ret <2 x bfloat> %res
-}
-
define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent_align4(ptr addrspace(1) %ptr, <2 x half> %value) {
; GFX900-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent_align4(
-; GFX900-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX900-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] {
; GFX900-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 4
; GFX900-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX900: atomicrmw.start:
@@ -88,7 +26,7 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent_align4(ptr addrspace(1
; GFX900-NEXT: ret <2 x half> [[TMP5]]
;
; GFX90A-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent_align4(
-; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] {
; GFX90A-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 4
; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX90A: atomicrmw.start:
@@ -105,7 +43,7 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent_align4(ptr addrspace(1
; GFX90A-NEXT: ret <2 x half> [[RES]]
;
; GFX942-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent_align4(
-; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] {
; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4
; GFX942-NEXT: ret <2 x half> [[RES]]
;
@@ -157,133 +95,9 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent_align4(ptr addrspac
ret <2 x bfloat> %res
}
-define <4 x half> @test_atomicrmw_fadd_v4f16_global_agent_align2(ptr addrspace(1) %ptr, <4 x half> %value) {
-; CHECK-LABEL: define <4 x half> @test_atomicrmw_fadd_v4f16_global_agent_align2(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 2
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[NEW:%.*]] = fadd <4 x half> [[LOADED]], [[VALUE]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: store <4 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP6:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP8]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP8]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret <4 x half> [[NEWLOADED]]
-;
- %res = atomicrmw fadd ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 2
- ret <4 x half> %res
-}
-
-define <4 x bfloat> @test_atomicrmw_fadd_v4bf16_global_agent_align2(ptr addrspace(1) %ptr, <4 x bfloat> %value) {
-; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fadd_v4bf16_global_agent_align2(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 2
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[NEW:%.*]] = fadd <4 x bfloat> [[LOADED]], [[VALUE]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: store <4 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP6:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret <4 x bfloat> [[NEWLOADED]]
-;
- %res = atomicrmw fadd ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 2
- ret <4 x bfloat> %res
-}
-
-define <4 x half> @test_atomicrmw_fadd_v4f16_global_agent_align4(ptr addrspace(1) %ptr, <4 x half> %value) {
-; CHECK-LABEL: define <4 x half> @test_atomicrmw_fadd_v4f16_global_agent_align4(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 4
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[NEW:%.*]] = fadd <4 x half> [[LOADED]], [[VALUE]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: store <4 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP6:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP8]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP8]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret <4 x half> [[NEWLOADED]]
-;
- %res = atomicrmw fadd ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 4
- ret <4 x half> %res
-}
-
-define <4 x bfloat> @test_atomicrmw_fadd_v4bf16_global_agent_align4(ptr addrspace(1) %ptr, <4 x bfloat> %value) {
-; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fadd_v4bf16_global_agent_align4(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 4
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[NEW:%.*]] = fadd <4 x bfloat> [[LOADED]], [[VALUE]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: store <4 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP6:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret <4 x bfloat> [[NEWLOADED]]
-;
- %res = atomicrmw fadd ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 4
- ret <4 x bfloat> %res
-}
-
define <4 x half> @test_atomicrmw_fadd_v4f16_global_agent_align8(ptr addrspace(1) %ptr, <4 x half> %value) {
; CHECK-LABEL: define <4 x half> @test_atomicrmw_fadd_v4f16_global_agent_align8(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 8
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
@@ -351,68 +165,6 @@ define <2 x float> @test_atomicrmw_fadd_v2f32_global_agent_align8(ptr addrspace(
; atomicrmw fsub
;---------------------------------------------------------------------
-define <2 x half> @test_atomicrmw_fsub_v2f16_global_agent_align2(ptr addrspace(1) %ptr, <2 x half> %value) {
-; CHECK-LABEL: define <2 x half> @test_atomicrmw_fsub_v2f16_global_agent_align2(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x half>, align 4, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x half>, align 4, addrspace(5)
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 2
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[NEW:%.*]] = fsub <2 x half> [[LOADED]], [[VALUE]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store <2 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: store <2 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 4
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP6:%.*]] = load <2 x half>, ptr addrspace(5) [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <2 x half>, i1 } poison, <2 x half> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x half>, i1 } [[TMP8]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x half>, i1 } [[TMP8]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret <2 x half> [[NEWLOADED]]
-;
- %res = atomicrmw fsub ptr addrspace(1) %ptr, <2 x half> %value syncscope("agent") seq_cst, align 2
- ret <2 x half> %res
-}
-
-define <2 x bfloat> @test_atomicrmw_fsub_v2bf16_global_agent_align2(ptr addrspace(1) %ptr, <2 x bfloat> %value) {
-; CHECK-LABEL: define <2 x bfloat> @test_atomicrmw_fsub_v2bf16_global_agent_align2(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 2
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[NEW:%.*]] = fsub <2 x bfloat> [[LOADED]], [[VALUE]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store <2 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: store <2 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 4
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP6:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <2 x bfloat>, i1 } poison, <2 x bfloat> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x bfloat>, i1 } [[TMP8]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x bfloat>, i1 } [[TMP8]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret <2 x bfloat> [[NEWLOADED]]
-;
- %res = atomicrmw fsub ptr addrspace(1) %ptr, <2 x bfloat> %value syncscope("agent") seq_cst, align 2
- ret <2 x bfloat> %res
-}
-
define <2 x half> @test_atomicrmw_fsub_v2f16_global_agent_align4(ptr addrspace(1) %ptr, <2 x half> %value) {
; CHECK-LABEL: define <2 x half> @test_atomicrmw_fsub_v2f16_global_agent_align4(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] {
@@ -457,130 +209,6 @@ define <2 x bfloat> @test_atomicrmw_fsub_v2bf16_global_agent_align4(ptr addrspac
ret <2 x bfloat> %res
}
-define <4 x half> @test_atomicrmw_fsub_v4f16_global_agent_align2(ptr addrspace(1) %ptr, <4 x half> %value) {
-; CHECK-LABEL: define <4 x half> @test_atomicrmw_fsub_v4f16_global_agent_align2(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 2
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[NEW:%.*]] = fsub <4 x half> [[LOADED]], [[VALUE]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: store <4 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP6:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP8]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP8]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret <4 x half> [[NEWLOADED]]
-;
- %res = atomicrmw fsub ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 2
- ret <4 x half> %res
-}
-
-define <4 x bfloat> @test_atomicrmw_fsub_v4bf16_global_agent_align2(ptr addrspace(1) %ptr, <4 x bfloat> %value) {
-; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fsub_v4bf16_global_agent_align2(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 2
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[NEW:%.*]] = fsub <4 x bfloat> [[LOADED]], [[VALUE]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: store <4 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP6:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret <4 x bfloat> [[NEWLOADED]]
-;
- %res = atomicrmw fsub ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 2
- ret <4 x bfloat> %res
-}
-
-define <4 x half> @test_atomicrmw_fsub_v4f16_global_agent_align4(ptr addrspace(1) %ptr, <4 x half> %value) {
-; CHECK-LABEL: define <4 x half> @test_atomicrmw_fsub_v4f16_global_agent_align4(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 4
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[NEW:%.*]] = fsub <4 x half> [[LOADED]], [[VALUE]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: store <4 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP6:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP8]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP8]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret <4 x half> [[NEWLOADED]]
-;
- %res = atomicrmw fsub ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 4
- ret <4 x half> %res
-}
-
-define <4 x bfloat> @test_atomicrmw_fsub_v4bf16_global_agent_align4(ptr addrspace(1) %ptr, <4 x bfloat> %value) {
-; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fsub_v4bf16_global_agent_align4(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 4
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[NEW:%.*]] = fsub <4 x bfloat> [[LOADED]], [[VALUE]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: store <4 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP6:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret <4 x bfloat> [[NEWLOADED]]
-;
- %res = atomicrmw fsub ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 4
- ret <4 x bfloat> %res
-}
-
define <4 x half> @test_atomicrmw_fsub_v4f16_global_agent_align8(ptr addrspace(1) %ptr, <4 x half> %value) {
; CHECK-LABEL: define <4 x half> @test_atomicrmw_fsub_v4f16_global_agent_align8(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
@@ -651,68 +279,6 @@ define <2 x float> @test_atomicrmw_fsub_v2f32_global_agent_align8(ptr addrspace(
; atomicrmw fmin
;---------------------------------------------------------------------
-define <2 x half> @test_atomicrmw_fmin_v2f16_global_agent_align2(ptr addrspace(1) %ptr, <2 x half> %value) {
-; CHECK-LABEL: define <2 x half> @test_atomicrmw_fmin_v2f16_global_agent_align2(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x half>, align 4, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x half>, align 4, addrspace(5)
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 2
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[LOADED]], <2 x half> [[VALUE]])
-; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store <2 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: store <2 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 4
-; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP7:%.*]] = load <2 x half>, ptr addrspace(5) [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x half>, i1 } poison, <2 x half> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <2 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x half>, i1 } [[TMP9]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x half>, i1 } [[TMP9]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret <2 x half> [[NEWLOADED]]
-;
- %res = atomicrmw fmin ptr addrspace(1) %ptr, <2 x half> %value syncscope("agent") seq_cst, align 2
- ret <2 x half> %res
-}
-
-define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_global_agent_align2(ptr addrspace(1) %ptr, <2 x bfloat> %value) {
-; CHECK-LABEL: define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_global_agent_align2(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 2
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = call <2 x bfloat> @llvm.minnum.v2bf16(<2 x bfloat> [[LOADED]], <2 x bfloat> [[VALUE]])
-; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store <2 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: store <2 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 4
-; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP7:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x bfloat>, i1 } poison, <2 x bfloat> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <2 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x bfloat>, i1 } [[TMP9]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x bfloat>, i1 } [[TMP9]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret <2 x bfloat> [[NEWLOADED]]
-;
- %res = atomicrmw fmin ptr addrspace(1) %ptr, <2 x bfloat> %value syncscope("agent") seq_cst, align 2
- ret <2 x bfloat> %res
-}
-
define <2 x half> @test_atomicrmw_fmin_v2f16_global_agent_align4(ptr addrspace(1) %ptr, <2 x half> %value) {
; CHECK-LABEL: define <2 x half> @test_atomicrmw_fmin_v2f16_global_agent_align4(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] {
@@ -757,130 +323,6 @@ define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_global_agent_align4(ptr addrspac
ret <2 x bfloat> %res
}
-define <4 x half> @test_atomicrmw_fmin_v4f16_global_agent_align2(ptr addrspace(1) %ptr, <4 x half> %value) {
-; CHECK-LABEL: define <4 x half> @test_atomicrmw_fmin_v4f16_global_agent_align2(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 2
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.minnum.v4f16(<4 x half> [[LOADED]], <4 x half> [[VALUE]])
-; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: store <4 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP7:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP9]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP9]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret <4 x half> [[NEWLOADED]]
-;
- %res = atomicrmw fmin ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 2
- ret <4 x half> %res
-}
-
-define <4 x bfloat> @test_atomicrmw_fmin_v4bf16_global_agent_align2(ptr addrspace(1) %ptr, <4 x bfloat> %value) {
-; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fmin_v4bf16_global_agent_align2(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 2
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = call <4 x bfloat> @llvm.minnum.v4bf16(<4 x bfloat> [[LOADED]], <4 x bfloat> [[VALUE]])
-; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: store <4 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret <4 x bfloat> [[NEWLOADED]]
-;
- %res = atomicrmw fmin ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 2
- ret <4 x bfloat> %res
-}
-
-define <4 x half> @test_atomicrmw_fmin_v4f16_global_agent_align4(ptr addrspace(1) %ptr, <4 x half> %value) {
-; CHECK-LABEL: define <4 x half> @test_atomicrmw_fmin_v4f16_global_agent_align4(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 4
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.minnum.v4f16(<4 x half> [[LOADED]], <4 x half> [[VALUE]])
-; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: store <4 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP7:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP9]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP9]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret <4 x half> [[NEWLOADED]]
-;
- %res = atomicrmw fmin ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 4
- ret <4 x half> %res
-}
-
-define <4 x bfloat> @test_atomicrmw_fmin_v4bf16_global_agent_align4(ptr addrspace(1) %ptr, <4 x bfloat> %value) {
-; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fmin_v4bf16_global_agent_align4(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 4
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = call <4 x bfloat> @llvm.minnum.v4bf16(<4 x bfloat> [[LOADED]], <4 x bfloat> [[VALUE]])
-; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: store <4 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret <4 x bfloat> [[NEWLOADED]]
-;
- %res = atomicrmw fmin ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 4
- ret <4 x bfloat> %res
-}
-
define <4 x half> @test_atomicrmw_fmin_v4f16_global_agent_align8(ptr addrspace(1) %ptr, <4 x half> %value) {
; CHECK-LABEL: define <4 x half> @test_atomicrmw_fmin_v4f16_global_agent_align8(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
@@ -951,68 +393,6 @@ define <2 x float> @test_atomicrmw_fmin_v2f32_global_agent_align8(ptr addrspace(
; atomicrmw fmax
;---------------------------------------------------------------------
-define <2 x half> @test_atomicrmw_fmax_v2f16_global_agent_align2(ptr addrspace(1) %ptr, <2 x half> %value) {
-; CHECK-LABEL: define <2 x half> @test_atomicrmw_fmax_v2f16_global_agent_align2(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x half>, align 4, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x half>, align 4, addrspace(5)
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 2
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = call <2 x half> @llvm.maxnum.v2f16(<2 x half> [[LOADED]], <2 x half> [[VALUE]])
-; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store <2 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: store <2 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 4
-; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP7:%.*]] = load <2 x half>, ptr addrspace(5) [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x half>, i1 } poison, <2 x half> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <2 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x half>, i1 } [[TMP9]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x half>, i1 } [[TMP9]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret <2 x half> [[NEWLOADED]]
-;
- %res = atomicrmw fmax ptr addrspace(1) %ptr, <2 x half> %value syncscope("agent") seq_cst, align 2
- ret <2 x half> %res
-}
-
-define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_global_agent_align2(ptr addrspace(1) %ptr, <2 x bfloat> %value) {
-; CHECK-LABEL: define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_global_agent_align2(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 2
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = call <2 x bfloat> @llvm.maxnum.v2bf16(<2 x bfloat> [[LOADED]], <2 x bfloat> [[VALUE]])
-; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store <2 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: store <2 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 4
-; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP7:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x bfloat>, i1 } poison, <2 x bfloat> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <2 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x bfloat>, i1 } [[TMP9]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x bfloat>, i1 } [[TMP9]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret <2 x bfloat> [[NEWLOADED]]
-;
- %res = atomicrmw fmax ptr addrspace(1) %ptr, <2 x bfloat> %value syncscope("agent") seq_cst, align 2
- ret <2 x bfloat> %res
-}
-
define <2 x half> @test_atomicrmw_fmax_v2f16_global_agent_align4(ptr addrspace(1) %ptr, <2 x half> %value) {
; CHECK-LABEL: define <2 x half> @test_atomicrmw_fmax_v2f16_global_agent_align4(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] {
@@ -1057,130 +437,6 @@ define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_global_agent_align4(ptr addrspac
ret <2 x bfloat> %res
}
-define <4 x half> @test_atomicrmw_fmax_v4f16_global_agent_align2(ptr addrspace(1) %ptr, <4 x half> %value) {
-; CHECK-LABEL: define <4 x half> @test_atomicrmw_fmax_v4f16_global_agent_align2(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 2
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.maxnum.v4f16(<4 x half> [[LOADED]], <4 x half> [[VALUE]])
-; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: store <4 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP7:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP9]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP9]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret <4 x half> [[NEWLOADED]]
-;
- %res = atomicrmw fmax ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 2
- ret <4 x half> %res
-}
-
-define <4 x bfloat> @test_atomicrmw_fmax_v4bf16_global_agent_align2(ptr addrspace(1) %ptr, <4 x bfloat> %value) {
-; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fmax_v4bf16_global_agent_align2(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 2
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = call <4 x bfloat> @llvm.maxnum.v4bf16(<4 x bfloat> [[LOADED]], <4 x bfloat> [[VALUE]])
-; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: store <4 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret <4 x bfloat> [[NEWLOADED]]
-;
- %res = atomicrmw fmax ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 2
- ret <4 x bfloat> %res
-}
-
-define <4 x half> @test_atomicrmw_fmax_v4f16_global_agent_align4(ptr addrspace(1) %ptr, <4 x half> %value) {
-; CHECK-LABEL: define <4 x half> @test_atomicrmw_fmax_v4f16_global_agent_align4(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 4
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.maxnum.v4f16(<4 x half> [[LOADED]], <4 x half> [[VALUE]])
-; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: store <4 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP7:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP9]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP9]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret <4 x half> [[NEWLOADED]]
-;
- %res = atomicrmw fmax ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 4
- ret <4 x half> %res
-}
-
-define <4 x bfloat> @test_atomicrmw_fmax_v4bf16_global_agent_align4(ptr addrspace(1) %ptr, <4 x bfloat> %value) {
-; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fmax_v4bf16_global_agent_align4(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 4
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = call <4 x bfloat> @llvm.maxnum.v4bf16(<4 x bfloat> [[LOADED]], <4 x bfloat> [[VALUE]])
-; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: store <4 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret <4 x bfloat> [[NEWLOADED]]
-;
- %res = atomicrmw fmax ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 4
- ret <4 x bfloat> %res
-}
-
define <4 x half> @test_atomicrmw_fmax_v4f16_global_agent_align8(ptr addrspace(1) %ptr, <4 x half> %value) {
; CHECK-LABEL: define <4 x half> @test_atomicrmw_fmax_v4f16_global_agent_align8(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/unaligned-atomic.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/unaligned-atomic.ll
index acf726a7de5e0..5ebba03c2cc73 100644
--- a/llvm/test/Transforms/AtomicExpand/AMDGPU/unaligned-atomic.ll
+++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/unaligned-atomic.ll
@@ -1,29 +1,13 @@
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=atomic-expand %s 2>&1 | FileCheck --check-prefix=GCN %s
+; RUN: not opt -disable-output -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=atomic-expand %s 2>&1 | FileCheck --implicit-check-not=error %s
+; CHECK: error: unsupported atomic load
define i32 @atomic_load_global_align1(ptr addrspace(1) %ptr) {
-; GCN-LABEL: @atomic_load_global_align1(
-; GCN-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[PTR:%.*]] to ptr
-; GCN-NEXT: [[TMP3:%.*]] = alloca i32, align 4, addrspace(5)
-; GCN-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP3]])
-; GCN-NEXT: call void @__atomic_load(i64 4, ptr [[TMP2]], ptr addrspace(5) [[TMP3]], i32 5)
-; GCN-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(5) [[TMP3]], align 4
-; GCN-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP3]])
-; GCN-NEXT: ret i32 [[TMP5]]
-;
%val = load atomic i32, ptr addrspace(1) %ptr seq_cst, align 1
ret i32 %val
}
+; FIXME: 2nd error not emitted in next function
define void @atomic_store_global_align1(ptr addrspace(1) %ptr, i32 %val) {
-; GCN-LABEL: @atomic_store_global_align1(
-; GCN-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[PTR:%.*]] to ptr
-; GCN-NEXT: [[TMP3:%.*]] = alloca i32, align 4, addrspace(5)
-; GCN-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP3]])
-; GCN-NEXT: store i32 [[VAL:%.*]], ptr addrspace(5) [[TMP3]], align 4
-; GCN-NEXT: call void @__atomic_store(i64 4, ptr [[TMP2]], ptr addrspace(5) [[TMP3]], i32 0)
-; GCN-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP3]])
-; GCN-NEXT: ret void
-;
store atomic i32 %val, ptr addrspace(1) %ptr monotonic, align 1
ret void
}
diff --git a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
index 9d77631862ee5..98420a45f21e4 100644
--- a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
@@ -313,25 +313,8 @@ void RuntimeLibcallEmitter::emitGetInitRuntimeLibcallNames(
void RuntimeLibcallEmitter::emitGetInitRuntimeLibcallUtils(
raw_ostream &OS) const {
- // FIXME: Hack we shouldn't really need
OS << "#ifdef GET_INIT_RUNTIME_LIBCALL_UTILS\n"
- "static inline bool isAtomicLibCall(llvm::RTLIB::Libcall LC) {\n"
- " switch (LC) {\n";
- for (const RuntimeLibcall &LibCall : RuntimeLibcallDefList) {
- StringRef Name = LibCall.getName();
- if (Name.contains("ATOMIC")) {
- OS << " case ";
- LibCall.emitEnumEntry(OS);
- OS << ":\n";
- }
- }
-
- OS << " return true;\n"
- " default:\n"
- " return false;\n"
- " }\n\n"
- " llvm_unreachable(\"covered switch over libcalls\");\n"
- "}\n#endif\n\n";
+ "#endif\n\n";
}
void RuntimeLibcallEmitter::run(raw_ostream &OS) {
More information about the llvm-branch-commits
mailing list