[llvm] Add out-of-line-atomics support to GlobalISel (PR #74588)
Thomas Preud'homme via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 15 07:09:36 PST 2023
https://github.com/RoboTux updated https://github.com/llvm/llvm-project/pull/74588
>From 57e9965465c921b137051b46c1d3e5e245f9cd38 Mon Sep 17 00:00:00 2001
From: Thomas Preud'homme <thomas.preudhomme at arm.com>
Date: Fri, 1 Dec 2023 12:01:52 +0000
Subject: [PATCH 1/6] Add out-of-line-atomics support to GlobalISel
This patch implement the GlobalISel counterpart to
4d7df43ffdb460dddb2877a886f75f45c3fee188.
---
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 183 ++
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 29 +-
.../aarch64-atomic-load-outline_atomics.ll | 48 +-
.../aarch64-atomic-store-outline_atomics.ll | 48 +-
.../aarch64-atomicrmw-outline_atomics.ll | 2380 +++++++----------
.../aarch64-cmpxchg-outline_atomics.ll | 1683 +++---------
.../AArch64/GlobalISel/arm64-atomic-128.ll | 239 ++
.../AArch64/GlobalISel/arm64-atomic.ll | 1367 ++++++++++
.../GlobalISel/legalizer-info-validation.mir | 2 -
9 files changed, 3157 insertions(+), 2822 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 045fc78218daef..186937e597c5bc 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -765,6 +765,166 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
return LegalizerHelper::Legalized;
}
+static RTLIB::Libcall
+getOutlineAtomicLibcall(unsigned Opc, AtomicOrdering Order, uint64_t MemSize) {
+ unsigned ModeN, ModelN;
+ switch (MemSize) {
+ case 1:
+ ModeN = 0;
+ break;
+ case 2:
+ ModeN = 1;
+ break;
+ case 4:
+ ModeN = 2;
+ break;
+ case 8:
+ ModeN = 3;
+ break;
+ case 16:
+ ModeN = 4;
+ break;
+ default:
+ return RTLIB::UNKNOWN_LIBCALL;
+ }
+
+ switch (Order) {
+ case AtomicOrdering::Monotonic:
+ ModelN = 0;
+ break;
+ case AtomicOrdering::Acquire:
+ ModelN = 1;
+ break;
+ case AtomicOrdering::Release:
+ ModelN = 2;
+ break;
+ case AtomicOrdering::AcquireRelease:
+ case AtomicOrdering::SequentiallyConsistent:
+ ModelN = 3;
+ break;
+ default:
+ return RTLIB::UNKNOWN_LIBCALL;
+ }
+
+#define LCALLS(A, B) \
+ { A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL }
+#define LCALL5(A) \
+ LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
+ switch (Opc) {
+ case TargetOpcode::G_ATOMIC_CMPXCHG:
+ case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
+ const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
+ return LC[ModeN][ModelN];
+ }
+ case TargetOpcode::G_ATOMICRMW_XCHG: {
+ const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
+ return LC[ModeN][ModelN];
+ }
+ case TargetOpcode::G_ATOMICRMW_ADD: {
+ const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
+ return LC[ModeN][ModelN];
+ }
+ case TargetOpcode::G_ATOMICRMW_AND: {
+ const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
+ return LC[ModeN][ModelN];
+ }
+ case TargetOpcode::G_ATOMICRMW_OR: {
+ const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
+ return LC[ModeN][ModelN];
+ }
+ case TargetOpcode::G_ATOMICRMW_XOR: {
+ const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
+ return LC[ModeN][ModelN];
+ }
+ default:
+ return RTLIB::UNKNOWN_LIBCALL;
+ }
+#undef LCALLS
+#undef LCALL5
+}
+
+static LegalizerHelper::LegalizeResult
+createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI) {
+ auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+
+ // Add all the args, except for the last which is an imm denoting 'tail'.
+ // const CallLowering::ArgInfo &Result,
+ // Operand 0 & 1 are return: 0 is old val, 1 is success, 2-4 are reg operands:
+ // 2 is ptr, 3 is expected, 4 is new
+ Type *RetTy;
+ SmallVector<Register> RetRegs;
+ SmallVector<CallLowering::ArgInfo, 3> Args;
+ unsigned Opc = MI.getOpcode();
+ switch (Opc) {
+ case TargetOpcode::G_ATOMIC_CMPXCHG:
+ case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
+ Register Success;
+ LLT SuccessLLT;
+ auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
+ MI.getFirst4RegLLTs();
+ RetRegs.push_back(Ret);
+ RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
+ if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
+ std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
+ NewLLT) = MI.getFirst5RegLLTs();
+ RetRegs.push_back(Success);
+ RetTy = StructType::get(
+ Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
+ }
+ Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
+ Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
+ Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
+ break;
+ }
+ case TargetOpcode::G_ATOMICRMW_XCHG:
+ case TargetOpcode::G_ATOMICRMW_ADD:
+ case TargetOpcode::G_ATOMICRMW_AND:
+ case TargetOpcode::G_ATOMICRMW_OR:
+ case TargetOpcode::G_ATOMICRMW_XOR: {
+ auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
+ RetRegs.push_back(Ret);
+ RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
+ if (Opc == TargetOpcode::G_ATOMICRMW_AND) {
+ Register Tmp = MRI.createGenericVirtualRegister(ValLLT);
+ MIRBuilder.buildXor(Tmp, MIRBuilder.buildConstant(ValLLT, -1), Val);
+ Val = Tmp;
+ }
+ Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
+ Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
+ break;
+ }
+ default:
+ llvm_unreachable("unsupported opcode");
+ }
+
+ auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
+ auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
+ auto &AtomicMI = cast<GMemOperation>(MI);
+ auto Ordering = AtomicMI.getMMO().getMergedOrdering();
+ uint64_t MemSize = AtomicMI.getMemSize();
+ RTLIB::Libcall RTLibcall = getOutlineAtomicLibcall(Opc, Ordering, MemSize);
+ const char *Name = TLI.getLibcallName(RTLibcall);
+
+ // Unsupported libcall on the target.
+ if (!Name) {
+ LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
+ << MIRBuilder.getTII().getName(Opc) << "\n");
+ return LegalizerHelper::UnableToLegalize;
+ }
+
+ CallLowering::CallLoweringInfo Info;
+ Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
+ Info.Callee = MachineOperand::CreateES(Name);
+ Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
+
+ std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
+ if (!CLI.lowerCall(MIRBuilder, Info))
+ return LegalizerHelper::UnableToLegalize;
+
+ return LegalizerHelper::Legalized;
+}
+
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
Type *FromType) {
auto ToMVT = MVT::getVT(ToType);
@@ -1020,6 +1180,18 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
return Status;
break;
}
+ case TargetOpcode::G_ATOMICRMW_XCHG:
+ case TargetOpcode::G_ATOMICRMW_ADD:
+ case TargetOpcode::G_ATOMICRMW_AND:
+ case TargetOpcode::G_ATOMICRMW_OR:
+ case TargetOpcode::G_ATOMICRMW_XOR:
+ case TargetOpcode::G_ATOMIC_CMPXCHG:
+ case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
+ auto Status = createAtomicLibcall(MIRBuilder, MI);
+ if (Status != Legalized)
+ return Status;
+ break;
+ }
case TargetOpcode::G_BZERO:
case TargetOpcode::G_MEMCPY:
case TargetOpcode::G_MEMMOVE:
@@ -3793,6 +3965,17 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return lowerTRUNC(MI);
GISEL_VECREDUCE_CASES_NONSEQ
return lowerVectorReduction(MI);
+ case G_ATOMICRMW_SUB: {
+ auto Val = MI.getOperand(2).getReg();
+ LLT ValLLT = MRI.getType(Val);
+ Register Tmp = MRI.createGenericVirtualRegister(ValLLT);
+ MIRBuilder.buildSub(Tmp, MIRBuilder.buildConstant(ValLLT, 0), Val);
+ auto [Ret, Mem] = MI.getFirst2Regs();
+ auto &MMO = cast<GMemOperation>(MI).getMMO();
+ MIRBuilder.buildAtomicRMWAdd(Ret, Mem, Tmp, MMO);
+ MI.eraseFromParent();
+ return Legalized;
+ }
}
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 21a412e9360dce..7fce3e501db57c 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -758,16 +758,39 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
+ .libcallIf([&ST](const LegalityQuery &Query) {
+ return ST.outlineAtomics() && !ST.hasLSE();
+ })
.customIf([](const LegalityQuery &Query) {
return Query.Types[0].getSizeInBits() == 128;
})
.clampScalar(0, s32, s64)
.legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)));
+ getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
+ G_ATOMICRMW_AND, G_ATOMICRMW_OR,
+ G_ATOMICRMW_XOR})
+ .libcallIf([&ST](const LegalityQuery &Query) {
+ return ST.outlineAtomics() && !ST.hasLSE();
+ })
+ .clampScalar(0, s32, s64)
+ .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)));
+
+ getActionDefinitionsBuilder(G_ATOMICRMW_SUB)
+ .lowerIf([&ST](const LegalityQuery &Query) {
+ return ST.outlineAtomics() && !ST.hasLSE();
+ })
+ .clampScalar(0, s32, s64)
+ .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)));
+
+ // [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far.
+ // Don't outline them unless
+ // (1) high level <atomic> support approved:
+ // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
+ // (2) low level libgcc and compiler-rt support implemented by:
+ // min/max outline atomics helpers
getActionDefinitionsBuilder(
- {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND,
- G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX,
- G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
+ {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
.clampScalar(0, s32, s64)
.legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)));
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-outline_atomics.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-outline_atomics.ll
index fb4bef33d9b4ff..fccafb29addbc3 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-outline_atomics.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-outline_atomics.ll
@@ -229,11 +229,7 @@ define dso_local i64 @load_atomic_i64_aligned_seq_cst_const(ptr readonly %ptr) {
define dso_local i128 @load_atomic_i128_aligned_unordered(ptr %ptr) {
; -O0-LABEL: load_atomic_i128_aligned_unordered:
-; -O0: ldxp x0, x1, [x9]
-; -O0: cmp x0, x10
-; -O0: cmp x1, x10
-; -O0: stxp w8, x10, x10, [x9]
-; -O0: stxp w8, x0, x1, [x9]
+; -O0: bl __aarch64_cas16_relax
;
; -O1-LABEL: load_atomic_i128_aligned_unordered:
; -O1: ldxp x0, x1, [x8]
@@ -244,11 +240,7 @@ define dso_local i128 @load_atomic_i128_aligned_unordered(ptr %ptr) {
define dso_local i128 @load_atomic_i128_aligned_unordered_const(ptr readonly %ptr) {
; -O0-LABEL: load_atomic_i128_aligned_unordered_const:
-; -O0: ldxp x0, x1, [x9]
-; -O0: cmp x0, x10
-; -O0: cmp x1, x10
-; -O0: stxp w8, x10, x10, [x9]
-; -O0: stxp w8, x0, x1, [x9]
+; -O0: bl __aarch64_cas16_relax
;
; -O1-LABEL: load_atomic_i128_aligned_unordered_const:
; -O1: ldxp x0, x1, [x8]
@@ -259,11 +251,7 @@ define dso_local i128 @load_atomic_i128_aligned_unordered_const(ptr readonly %pt
define dso_local i128 @load_atomic_i128_aligned_monotonic(ptr %ptr) {
; -O0-LABEL: load_atomic_i128_aligned_monotonic:
-; -O0: ldxp x0, x1, [x9]
-; -O0: cmp x0, x10
-; -O0: cmp x1, x10
-; -O0: stxp w8, x10, x10, [x9]
-; -O0: stxp w8, x0, x1, [x9]
+; -O0: bl __aarch64_cas16_relax
;
; -O1-LABEL: load_atomic_i128_aligned_monotonic:
; -O1: ldxp x0, x1, [x8]
@@ -274,11 +262,7 @@ define dso_local i128 @load_atomic_i128_aligned_monotonic(ptr %ptr) {
define dso_local i128 @load_atomic_i128_aligned_monotonic_const(ptr readonly %ptr) {
; -O0-LABEL: load_atomic_i128_aligned_monotonic_const:
-; -O0: ldxp x0, x1, [x9]
-; -O0: cmp x0, x10
-; -O0: cmp x1, x10
-; -O0: stxp w8, x10, x10, [x9]
-; -O0: stxp w8, x0, x1, [x9]
+; -O0: bl __aarch64_cas16_relax
;
; -O1-LABEL: load_atomic_i128_aligned_monotonic_const:
; -O1: ldxp x0, x1, [x8]
@@ -289,11 +273,7 @@ define dso_local i128 @load_atomic_i128_aligned_monotonic_const(ptr readonly %pt
define dso_local i128 @load_atomic_i128_aligned_acquire(ptr %ptr) {
; -O0-LABEL: load_atomic_i128_aligned_acquire:
-; -O0: ldaxp x0, x1, [x9]
-; -O0: cmp x0, x10
-; -O0: cmp x1, x10
-; -O0: stxp w8, x10, x10, [x9]
-; -O0: stxp w8, x0, x1, [x9]
+; -O0: bl __aarch64_cas16_acq
;
; -O1-LABEL: load_atomic_i128_aligned_acquire:
; -O1: ldaxp x0, x1, [x8]
@@ -304,11 +284,7 @@ define dso_local i128 @load_atomic_i128_aligned_acquire(ptr %ptr) {
define dso_local i128 @load_atomic_i128_aligned_acquire_const(ptr readonly %ptr) {
; -O0-LABEL: load_atomic_i128_aligned_acquire_const:
-; -O0: ldaxp x0, x1, [x9]
-; -O0: cmp x0, x10
-; -O0: cmp x1, x10
-; -O0: stxp w8, x10, x10, [x9]
-; -O0: stxp w8, x0, x1, [x9]
+; -O0: bl __aarch64_cas16_acq
;
; -O1-LABEL: load_atomic_i128_aligned_acquire_const:
; -O1: ldaxp x0, x1, [x8]
@@ -319,11 +295,7 @@ define dso_local i128 @load_atomic_i128_aligned_acquire_const(ptr readonly %ptr)
define dso_local i128 @load_atomic_i128_aligned_seq_cst(ptr %ptr) {
; -O0-LABEL: load_atomic_i128_aligned_seq_cst:
-; -O0: ldaxp x0, x1, [x9]
-; -O0: cmp x0, x10
-; -O0: cmp x1, x10
-; -O0: stlxp w8, x10, x10, [x9]
-; -O0: stlxp w8, x0, x1, [x9]
+; -O0: bl __aarch64_cas16_acq_rel
;
; -O1-LABEL: load_atomic_i128_aligned_seq_cst:
; -O1: ldaxp x0, x1, [x8]
@@ -334,11 +306,7 @@ define dso_local i128 @load_atomic_i128_aligned_seq_cst(ptr %ptr) {
define dso_local i128 @load_atomic_i128_aligned_seq_cst_const(ptr readonly %ptr) {
; -O0-LABEL: load_atomic_i128_aligned_seq_cst_const:
-; -O0: ldaxp x0, x1, [x9]
-; -O0: cmp x0, x10
-; -O0: cmp x1, x10
-; -O0: stlxp w8, x10, x10, [x9]
-; -O0: stlxp w8, x0, x1, [x9]
+; -O0: bl __aarch64_cas16_acq_rel
;
; -O1-LABEL: load_atomic_i128_aligned_seq_cst_const:
; -O1: ldaxp x0, x1, [x8]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-outline_atomics.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-outline_atomics.ll
index 3d204b734d4a03..e594561010464b 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-outline_atomics.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-outline_atomics.ll
@@ -117,14 +117,10 @@ define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {
define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_unordered:
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stxp w8, x14, x15, [x11]
-; -O0: stxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: bl __aarch64_cas16_relax
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: store_atomic_i128_aligned_unordered:
@@ -136,14 +132,10 @@ define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr
define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_monotonic:
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stxp w8, x14, x15, [x11]
-; -O0: stxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: bl __aarch64_cas16_relax
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: store_atomic_i128_aligned_monotonic:
@@ -155,14 +147,10 @@ define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr
define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_release:
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: bl __aarch64_cas16_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: store_atomic_i128_aligned_release:
@@ -174,14 +162,10 @@ define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr)
define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_seq_cst:
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: bl __aarch64_cas16_acq_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: store_atomic_i128_aligned_seq_cst:
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll
index c660c139e35d44..e9b096e8c6c44b 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll
@@ -145,14 +145,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
define dso_local i128 @atomicrmw_xchg_i128_aligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_xchg_i128_aligned_monotonic:
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stxp w8, x14, x15, [x11]
-; -O0: stxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: bl __aarch64_cas16_relax
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_xchg_i128_aligned_monotonic:
@@ -164,14 +160,10 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_monotonic(ptr %ptr, i128 %val
define dso_local i128 @atomicrmw_xchg_i128_aligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_xchg_i128_aligned_acquire:
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stxp w8, x14, x15, [x11]
-; -O0: stxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: bl __aarch64_cas16_acq
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_xchg_i128_aligned_acquire:
@@ -183,14 +175,10 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_acquire(ptr %ptr, i128 %value
define dso_local i128 @atomicrmw_xchg_i128_aligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_xchg_i128_aligned_release:
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: bl __aarch64_cas16_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_xchg_i128_aligned_release:
@@ -202,14 +190,10 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_release(ptr %ptr, i128 %value
define dso_local i128 @atomicrmw_xchg_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_xchg_i128_aligned_acq_rel:
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: bl __aarch64_cas16_acq_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_xchg_i128_aligned_acq_rel:
@@ -221,14 +205,10 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_acq_rel(ptr %ptr, i128 %value
define dso_local i128 @atomicrmw_xchg_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_xchg_i128_aligned_seq_cst:
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: bl __aarch64_cas16_acq_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_xchg_i128_aligned_seq_cst:
@@ -555,16 +535,12 @@ define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic:
-; -O0: adds x14, x8, x10
+; -O0: adds x2, x8, x10
; -O0: subs w10, w10, #1
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stxp w8, x14, x15, [x11]
-; -O0: stxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: bl __aarch64_cas16_relax
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
@@ -577,16 +553,12 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_acquire:
-; -O0: adds x14, x8, x10
+; -O0: adds x2, x8, x10
; -O0: subs w10, w10, #1
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stxp w8, x14, x15, [x11]
-; -O0: stxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: bl __aarch64_cas16_acq
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
@@ -599,16 +571,12 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_release:
-; -O0: adds x14, x8, x10
+; -O0: adds x2, x8, x10
; -O0: subs w10, w10, #1
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: bl __aarch64_cas16_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_release:
@@ -621,16 +589,12 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel:
-; -O0: adds x14, x8, x10
+; -O0: adds x2, x8, x10
; -O0: subs w10, w10, #1
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: bl __aarch64_cas16_acq_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
@@ -643,16 +607,12 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst:
-; -O0: adds x14, x8, x10
+; -O0: adds x2, x8, x10
; -O0: subs w10, w10, #1
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: bl __aarch64_cas16_acq_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
@@ -1170,15 +1130,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic:
-; -O0: subs x14, x8, x10
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stxp w8, x14, x15, [x11]
-; -O0: stxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: subs x2, x8, x10
+; -O0: bl __aarch64_cas16_relax
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
@@ -1191,15 +1147,11 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire:
-; -O0: subs x14, x8, x10
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stxp w8, x14, x15, [x11]
-; -O0: stxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: subs x2, x8, x10
+; -O0: bl __aarch64_cas16_acq
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
@@ -1212,15 +1164,11 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_release:
-; -O0: subs x14, x8, x10
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: subs x2, x8, x10
+; -O0: bl __aarch64_cas16_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
@@ -1233,15 +1181,11 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
-; -O0: subs x14, x8, x10
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: subs x2, x8, x10
+; -O0: bl __aarch64_cas16_acq_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
@@ -1254,15 +1198,11 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
-; -O0: subs x14, x8, x10
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: subs x2, x8, x10
+; -O0: bl __aarch64_cas16_acq_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
@@ -1575,7 +1515,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
define dso_local i8 @atomicrmw_and_i8_aligned_monotonic(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_and_i8_aligned_monotonic:
-; -O0: mvn w0, w8
+; -O0: eor w0, w8, w9
; -O0: bl __aarch64_ldclr1_relax
;
; -O1-LABEL: atomicrmw_and_i8_aligned_monotonic:
@@ -1587,7 +1527,7 @@ define dso_local i8 @atomicrmw_and_i8_aligned_monotonic(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_and_i8_aligned_acquire(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_and_i8_aligned_acquire:
-; -O0: mvn w0, w8
+; -O0: eor w0, w8, w9
; -O0: bl __aarch64_ldclr1_acq
;
; -O1-LABEL: atomicrmw_and_i8_aligned_acquire:
@@ -1599,7 +1539,7 @@ define dso_local i8 @atomicrmw_and_i8_aligned_acquire(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_and_i8_aligned_release(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_and_i8_aligned_release:
-; -O0: mvn w0, w8
+; -O0: eor w0, w8, w9
; -O0: bl __aarch64_ldclr1_rel
;
; -O1-LABEL: atomicrmw_and_i8_aligned_release:
@@ -1611,7 +1551,7 @@ define dso_local i8 @atomicrmw_and_i8_aligned_release(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_and_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_and_i8_aligned_acq_rel:
-; -O0: mvn w0, w8
+; -O0: eor w0, w8, w9
; -O0: bl __aarch64_ldclr1_acq_rel
;
; -O1-LABEL: atomicrmw_and_i8_aligned_acq_rel:
@@ -1623,7 +1563,7 @@ define dso_local i8 @atomicrmw_and_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_and_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_and_i8_aligned_seq_cst:
-; -O0: mvn w0, w8
+; -O0: eor w0, w8, w9
; -O0: bl __aarch64_ldclr1_acq_rel
;
; -O1-LABEL: atomicrmw_and_i8_aligned_seq_cst:
@@ -1635,7 +1575,7 @@ define dso_local i8 @atomicrmw_and_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
define dso_local i16 @atomicrmw_and_i16_aligned_monotonic(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_and_i16_aligned_monotonic:
-; -O0: mvn w0, w8
+; -O0: eor w0, w8, w9
; -O0: bl __aarch64_ldclr2_relax
;
; -O1-LABEL: atomicrmw_and_i16_aligned_monotonic:
@@ -1647,7 +1587,7 @@ define dso_local i16 @atomicrmw_and_i16_aligned_monotonic(ptr %ptr, i16 %value)
define dso_local i16 @atomicrmw_and_i16_aligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_and_i16_aligned_acquire:
-; -O0: mvn w0, w8
+; -O0: eor w0, w8, w9
; -O0: bl __aarch64_ldclr2_acq
;
; -O1-LABEL: atomicrmw_and_i16_aligned_acquire:
@@ -1659,7 +1599,7 @@ define dso_local i16 @atomicrmw_and_i16_aligned_acquire(ptr %ptr, i16 %value) {
define dso_local i16 @atomicrmw_and_i16_aligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_and_i16_aligned_release:
-; -O0: mvn w0, w8
+; -O0: eor w0, w8, w9
; -O0: bl __aarch64_ldclr2_rel
;
; -O1-LABEL: atomicrmw_and_i16_aligned_release:
@@ -1671,7 +1611,7 @@ define dso_local i16 @atomicrmw_and_i16_aligned_release(ptr %ptr, i16 %value) {
define dso_local i16 @atomicrmw_and_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_and_i16_aligned_acq_rel:
-; -O0: mvn w0, w8
+; -O0: eor w0, w8, w9
; -O0: bl __aarch64_ldclr2_acq_rel
;
; -O1-LABEL: atomicrmw_and_i16_aligned_acq_rel:
@@ -1683,7 +1623,7 @@ define dso_local i16 @atomicrmw_and_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
define dso_local i16 @atomicrmw_and_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_and_i16_aligned_seq_cst:
-; -O0: mvn w0, w8
+; -O0: eor w0, w8, w9
; -O0: bl __aarch64_ldclr2_acq_rel
;
; -O1-LABEL: atomicrmw_and_i16_aligned_seq_cst:
@@ -1695,7 +1635,7 @@ define dso_local i16 @atomicrmw_and_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
define dso_local i32 @atomicrmw_and_i32_aligned_monotonic(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_and_i32_aligned_monotonic:
-; -O0: mvn w0, w8
+; -O0: eor w0, w8, w9
; -O0: bl __aarch64_ldclr4_relax
;
; -O1-LABEL: atomicrmw_and_i32_aligned_monotonic:
@@ -1707,7 +1647,7 @@ define dso_local i32 @atomicrmw_and_i32_aligned_monotonic(ptr %ptr, i32 %value)
define dso_local i32 @atomicrmw_and_i32_aligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_and_i32_aligned_acquire:
-; -O0: mvn w0, w8
+; -O0: eor w0, w8, w9
; -O0: bl __aarch64_ldclr4_acq
;
; -O1-LABEL: atomicrmw_and_i32_aligned_acquire:
@@ -1719,7 +1659,7 @@ define dso_local i32 @atomicrmw_and_i32_aligned_acquire(ptr %ptr, i32 %value) {
define dso_local i32 @atomicrmw_and_i32_aligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_and_i32_aligned_release:
-; -O0: mvn w0, w8
+; -O0: eor w0, w8, w9
; -O0: bl __aarch64_ldclr4_rel
;
; -O1-LABEL: atomicrmw_and_i32_aligned_release:
@@ -1731,7 +1671,7 @@ define dso_local i32 @atomicrmw_and_i32_aligned_release(ptr %ptr, i32 %value) {
define dso_local i32 @atomicrmw_and_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_and_i32_aligned_acq_rel:
-; -O0: mvn w0, w8
+; -O0: eor w0, w8, w9
; -O0: bl __aarch64_ldclr4_acq_rel
;
; -O1-LABEL: atomicrmw_and_i32_aligned_acq_rel:
@@ -1743,7 +1683,7 @@ define dso_local i32 @atomicrmw_and_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
define dso_local i32 @atomicrmw_and_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_and_i32_aligned_seq_cst:
-; -O0: mvn w0, w8
+; -O0: eor w0, w8, w9
; -O0: bl __aarch64_ldclr4_acq_rel
;
; -O1-LABEL: atomicrmw_and_i32_aligned_seq_cst:
@@ -1755,7 +1695,7 @@ define dso_local i32 @atomicrmw_and_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
define dso_local i64 @atomicrmw_and_i64_aligned_monotonic(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_and_i64_aligned_monotonic:
-; -O0: mvn x0, x8
+; -O0: eor x0, x8, x9
; -O0: bl __aarch64_ldclr8_relax
;
; -O1-LABEL: atomicrmw_and_i64_aligned_monotonic:
@@ -1767,7 +1707,7 @@ define dso_local i64 @atomicrmw_and_i64_aligned_monotonic(ptr %ptr, i64 %value)
define dso_local i64 @atomicrmw_and_i64_aligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_and_i64_aligned_acquire:
-; -O0: mvn x0, x8
+; -O0: eor x0, x8, x9
; -O0: bl __aarch64_ldclr8_acq
;
; -O1-LABEL: atomicrmw_and_i64_aligned_acquire:
@@ -1779,7 +1719,7 @@ define dso_local i64 @atomicrmw_and_i64_aligned_acquire(ptr %ptr, i64 %value) {
define dso_local i64 @atomicrmw_and_i64_aligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_and_i64_aligned_release:
-; -O0: mvn x0, x8
+; -O0: eor x0, x8, x9
; -O0: bl __aarch64_ldclr8_rel
;
; -O1-LABEL: atomicrmw_and_i64_aligned_release:
@@ -1791,7 +1731,7 @@ define dso_local i64 @atomicrmw_and_i64_aligned_release(ptr %ptr, i64 %value) {
define dso_local i64 @atomicrmw_and_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_and_i64_aligned_acq_rel:
-; -O0: mvn x0, x8
+; -O0: eor x0, x8, x9
; -O0: bl __aarch64_ldclr8_acq_rel
;
; -O1-LABEL: atomicrmw_and_i64_aligned_acq_rel:
@@ -1803,7 +1743,7 @@ define dso_local i64 @atomicrmw_and_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
define dso_local i64 @atomicrmw_and_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_and_i64_aligned_seq_cst:
-; -O0: mvn x0, x8
+; -O0: eor x0, x8, x9
; -O0: bl __aarch64_ldclr8_acq_rel
;
; -O1-LABEL: atomicrmw_and_i64_aligned_seq_cst:
@@ -1815,16 +1755,12 @@ define dso_local i64 @atomicrmw_and_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
define dso_local i128 @atomicrmw_and_i128_aligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_and_i128_aligned_monotonic:
-; -O0: and x14, x8, x10
-; -O0: and x15, x8, x9
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stxp w8, x14, x15, [x11]
-; -O0: stxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: and x2, x8, x10
+; -O0: and x3, x8, x9
+; -O0: bl __aarch64_cas16_relax
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic:
@@ -1838,16 +1774,12 @@ define dso_local i128 @atomicrmw_and_i128_aligned_monotonic(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_and_i128_aligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_and_i128_aligned_acquire:
-; -O0: and x14, x8, x10
-; -O0: and x15, x8, x9
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stxp w8, x14, x15, [x11]
-; -O0: stxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: and x2, x8, x10
+; -O0: and x3, x8, x9
+; -O0: bl __aarch64_cas16_acq
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_and_i128_aligned_acquire:
@@ -1861,16 +1793,12 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acquire(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_and_i128_aligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_and_i128_aligned_release:
-; -O0: and x14, x8, x10
-; -O0: and x15, x8, x9
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: and x2, x8, x10
+; -O0: and x3, x8, x9
+; -O0: bl __aarch64_cas16_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_and_i128_aligned_release:
@@ -1884,16 +1812,12 @@ define dso_local i128 @atomicrmw_and_i128_aligned_release(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_and_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_and_i128_aligned_acq_rel:
-; -O0: and x14, x8, x10
-; -O0: and x15, x8, x9
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: and x2, x8, x10
+; -O0: and x3, x8, x9
+; -O0: bl __aarch64_cas16_acq_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel:
@@ -1907,16 +1831,12 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acq_rel(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_and_i128_aligned_seq_cst:
-; -O0: and x14, x8, x10
-; -O0: and x15, x8, x9
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: and x2, x8, x10
+; -O0: and x3, x8, x9
+; -O0: bl __aarch64_cas16_acq_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst:
@@ -1930,7 +1850,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value)
define dso_local i8 @atomicrmw_and_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_and_i8_unaligned_monotonic:
-; -O0: mvn w0, w8
+; -O0: eor w0, w8, w9
; -O0: bl __aarch64_ldclr1_relax
;
; -O1-LABEL: atomicrmw_and_i8_unaligned_monotonic:
@@ -1942,7 +1862,7 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_and_i8_unaligned_acquire(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_and_i8_unaligned_acquire:
-; -O0: mvn w0, w8
+; -O0: eor w0, w8, w9
; -O0: bl __aarch64_ldclr1_acq
;
; -O1-LABEL: atomicrmw_and_i8_unaligned_acquire:
@@ -1954,7 +1874,7 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_acquire(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_and_i8_unaligned_release(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_and_i8_unaligned_release:
-; -O0: mvn w0, w8
+; -O0: eor w0, w8, w9
; -O0: bl __aarch64_ldclr1_rel
;
; -O1-LABEL: atomicrmw_and_i8_unaligned_release:
@@ -1966,7 +1886,7 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_release(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_and_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_and_i8_unaligned_acq_rel:
-; -O0: mvn w0, w8
+; -O0: eor w0, w8, w9
; -O0: bl __aarch64_ldclr1_acq_rel
;
; -O1-LABEL: atomicrmw_and_i8_unaligned_acq_rel:
@@ -1978,7 +1898,7 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_and_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_and_i8_unaligned_seq_cst:
-; -O0: mvn w0, w8
+; -O0: eor w0, w8, w9
; -O0: bl __aarch64_ldclr1_acq_rel
;
; -O1-LABEL: atomicrmw_and_i8_unaligned_seq_cst:
@@ -2245,13 +2165,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
define dso_local i8 @atomicrmw_nand_i8_aligned_monotonic(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_nand_i8_aligned_monotonic:
-; -O0: and w8, w10, w8
-; -O0: mvn w12, w8
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: and w8, w0, w8
+; -O0: mvn w1, w8
+; -O0: bl __aarch64_cas1_relax
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_nand_i8_aligned_monotonic:
; -O1: ldxrb w8, [x0]
@@ -2264,13 +2182,11 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_monotonic(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_nand_i8_aligned_acquire(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_nand_i8_aligned_acquire:
-; -O0: and w8, w10, w8
-; -O0: mvn w12, w8
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: and w8, w0, w8
+; -O0: mvn w1, w8
+; -O0: bl __aarch64_cas1_acq
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_nand_i8_aligned_acquire:
; -O1: ldaxrb w8, [x0]
@@ -2283,13 +2199,11 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_acquire(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_nand_i8_aligned_release(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_nand_i8_aligned_release:
-; -O0: and w8, w10, w8
-; -O0: mvn w12, w8
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: and w8, w0, w8
+; -O0: mvn w1, w8
+; -O0: bl __aarch64_cas1_rel
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_nand_i8_aligned_release:
; -O1: ldxrb w8, [x0]
@@ -2302,13 +2216,11 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_release(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_nand_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_nand_i8_aligned_acq_rel:
-; -O0: and w8, w10, w8
-; -O0: mvn w12, w8
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: and w8, w0, w8
+; -O0: mvn w1, w8
+; -O0: bl __aarch64_cas1_acq_rel
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_nand_i8_aligned_acq_rel:
; -O1: ldaxrb w8, [x0]
@@ -2321,13 +2233,11 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_nand_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_nand_i8_aligned_seq_cst:
-; -O0: and w8, w10, w8
-; -O0: mvn w12, w8
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: and w8, w0, w8
+; -O0: mvn w1, w8
+; -O0: bl __aarch64_cas1_acq_rel
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_nand_i8_aligned_seq_cst:
; -O1: ldaxrb w8, [x0]
@@ -2340,12 +2250,10 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
define dso_local i16 @atomicrmw_nand_i16_aligned_monotonic(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_nand_i16_aligned_monotonic:
-; -O0: and w9, w8, w9
-; -O0: mvn w12, w9
-; -O0: ldaxrh w9, [x11]
-; -O0: cmp w9, w8, uxth
-; -O0: stlxrh w10, w12, [x11]
-; -O0: subs w8, w8, w9, uxth
+; -O0: and w8, w0, w8
+; -O0: mvn w1, w8
+; -O0: bl __aarch64_cas2_relax
+; -O0: subs w8, w8, w0, uxth
;
; -O1-LABEL: atomicrmw_nand_i16_aligned_monotonic:
; -O1: ldxrh w8, [x0]
@@ -2358,12 +2266,10 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_monotonic(ptr %ptr, i16 %value)
define dso_local i16 @atomicrmw_nand_i16_aligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_nand_i16_aligned_acquire:
-; -O0: and w9, w8, w9
-; -O0: mvn w12, w9
-; -O0: ldaxrh w9, [x11]
-; -O0: cmp w9, w8, uxth
-; -O0: stlxrh w10, w12, [x11]
-; -O0: subs w8, w8, w9, uxth
+; -O0: and w8, w0, w8
+; -O0: mvn w1, w8
+; -O0: bl __aarch64_cas2_acq
+; -O0: subs w8, w8, w0, uxth
;
; -O1-LABEL: atomicrmw_nand_i16_aligned_acquire:
; -O1: ldaxrh w8, [x0]
@@ -2376,12 +2282,10 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_acquire(ptr %ptr, i16 %value) {
define dso_local i16 @atomicrmw_nand_i16_aligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_nand_i16_aligned_release:
-; -O0: and w9, w8, w9
-; -O0: mvn w12, w9
-; -O0: ldaxrh w9, [x11]
-; -O0: cmp w9, w8, uxth
-; -O0: stlxrh w10, w12, [x11]
-; -O0: subs w8, w8, w9, uxth
+; -O0: and w8, w0, w8
+; -O0: mvn w1, w8
+; -O0: bl __aarch64_cas2_rel
+; -O0: subs w8, w8, w0, uxth
;
; -O1-LABEL: atomicrmw_nand_i16_aligned_release:
; -O1: ldxrh w8, [x0]
@@ -2394,12 +2298,10 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_release(ptr %ptr, i16 %value) {
define dso_local i16 @atomicrmw_nand_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_nand_i16_aligned_acq_rel:
-; -O0: and w9, w8, w9
-; -O0: mvn w12, w9
-; -O0: ldaxrh w9, [x11]
-; -O0: cmp w9, w8, uxth
-; -O0: stlxrh w10, w12, [x11]
-; -O0: subs w8, w8, w9, uxth
+; -O0: and w8, w0, w8
+; -O0: mvn w1, w8
+; -O0: bl __aarch64_cas2_acq_rel
+; -O0: subs w8, w8, w0, uxth
;
; -O1-LABEL: atomicrmw_nand_i16_aligned_acq_rel:
; -O1: ldaxrh w8, [x0]
@@ -2412,12 +2314,10 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
define dso_local i16 @atomicrmw_nand_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_nand_i16_aligned_seq_cst:
-; -O0: and w9, w8, w9
-; -O0: mvn w12, w9
-; -O0: ldaxrh w9, [x11]
-; -O0: cmp w9, w8, uxth
-; -O0: stlxrh w10, w12, [x11]
-; -O0: subs w8, w8, w9, uxth
+; -O0: and w8, w0, w8
+; -O0: mvn w1, w8
+; -O0: bl __aarch64_cas2_acq_rel
+; -O0: subs w8, w8, w0, uxth
;
; -O1-LABEL: atomicrmw_nand_i16_aligned_seq_cst:
; -O1: ldaxrh w8, [x0]
@@ -2430,12 +2330,10 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
define dso_local i32 @atomicrmw_nand_i32_aligned_monotonic(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_nand_i32_aligned_monotonic:
-; -O0: and w9, w8, w9
-; -O0: mvn w12, w9
-; -O0: ldaxr w9, [x11]
-; -O0: cmp w9, w8
-; -O0: stlxr w10, w12, [x11]
-; -O0: subs w8, w9, w8
+; -O0: and w8, w0, w8
+; -O0: mvn w1, w8
+; -O0: bl __aarch64_cas4_relax
+; -O0: subs w8, w0, w8
;
; -O1-LABEL: atomicrmw_nand_i32_aligned_monotonic:
; -O1: ldxr w8, [x0]
@@ -2448,12 +2346,10 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_monotonic(ptr %ptr, i32 %value)
define dso_local i32 @atomicrmw_nand_i32_aligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_nand_i32_aligned_acquire:
-; -O0: and w9, w8, w9
-; -O0: mvn w12, w9
-; -O0: ldaxr w9, [x11]
-; -O0: cmp w9, w8
-; -O0: stlxr w10, w12, [x11]
-; -O0: subs w8, w9, w8
+; -O0: and w8, w0, w8
+; -O0: mvn w1, w8
+; -O0: bl __aarch64_cas4_acq
+; -O0: subs w8, w0, w8
;
; -O1-LABEL: atomicrmw_nand_i32_aligned_acquire:
; -O1: ldaxr w8, [x0]
@@ -2466,12 +2362,10 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_acquire(ptr %ptr, i32 %value) {
define dso_local i32 @atomicrmw_nand_i32_aligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_nand_i32_aligned_release:
-; -O0: and w9, w8, w9
-; -O0: mvn w12, w9
-; -O0: ldaxr w9, [x11]
-; -O0: cmp w9, w8
-; -O0: stlxr w10, w12, [x11]
-; -O0: subs w8, w9, w8
+; -O0: and w8, w0, w8
+; -O0: mvn w1, w8
+; -O0: bl __aarch64_cas4_rel
+; -O0: subs w8, w0, w8
;
; -O1-LABEL: atomicrmw_nand_i32_aligned_release:
; -O1: ldxr w8, [x0]
@@ -2484,12 +2378,10 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_release(ptr %ptr, i32 %value) {
define dso_local i32 @atomicrmw_nand_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_nand_i32_aligned_acq_rel:
-; -O0: and w9, w8, w9
-; -O0: mvn w12, w9
-; -O0: ldaxr w9, [x11]
-; -O0: cmp w9, w8
-; -O0: stlxr w10, w12, [x11]
-; -O0: subs w8, w9, w8
+; -O0: and w8, w0, w8
+; -O0: mvn w1, w8
+; -O0: bl __aarch64_cas4_acq_rel
+; -O0: subs w8, w0, w8
;
; -O1-LABEL: atomicrmw_nand_i32_aligned_acq_rel:
; -O1: ldaxr w8, [x0]
@@ -2502,12 +2394,10 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
define dso_local i32 @atomicrmw_nand_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_nand_i32_aligned_seq_cst:
-; -O0: and w9, w8, w9
-; -O0: mvn w12, w9
-; -O0: ldaxr w9, [x11]
-; -O0: cmp w9, w8
-; -O0: stlxr w10, w12, [x11]
-; -O0: subs w8, w9, w8
+; -O0: and w8, w0, w8
+; -O0: mvn w1, w8
+; -O0: bl __aarch64_cas4_acq_rel
+; -O0: subs w8, w0, w8
;
; -O1-LABEL: atomicrmw_nand_i32_aligned_seq_cst:
; -O1: ldaxr w8, [x0]
@@ -2520,12 +2410,10 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
define dso_local i64 @atomicrmw_nand_i64_aligned_monotonic(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_nand_i64_aligned_monotonic:
-; -O0: and x9, x8, x9
-; -O0: mvn x12, x9
-; -O0: ldaxr x9, [x11]
-; -O0: cmp x9, x8
-; -O0: stlxr w10, x12, [x11]
-; -O0: subs x8, x9, x8
+; -O0: and x8, x0, x8
+; -O0: mvn x1, x8
+; -O0: bl __aarch64_cas8_relax
+; -O0: subs x8, x0, x8
;
; -O1-LABEL: atomicrmw_nand_i64_aligned_monotonic:
; -O1: ldxr x0, [x8]
@@ -2538,12 +2426,10 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_monotonic(ptr %ptr, i64 %value)
define dso_local i64 @atomicrmw_nand_i64_aligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_nand_i64_aligned_acquire:
-; -O0: and x9, x8, x9
-; -O0: mvn x12, x9
-; -O0: ldaxr x9, [x11]
-; -O0: cmp x9, x8
-; -O0: stlxr w10, x12, [x11]
-; -O0: subs x8, x9, x8
+; -O0: and x8, x0, x8
+; -O0: mvn x1, x8
+; -O0: bl __aarch64_cas8_acq
+; -O0: subs x8, x0, x8
;
; -O1-LABEL: atomicrmw_nand_i64_aligned_acquire:
; -O1: ldaxr x0, [x8]
@@ -2556,12 +2442,10 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_acquire(ptr %ptr, i64 %value) {
define dso_local i64 @atomicrmw_nand_i64_aligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_nand_i64_aligned_release:
-; -O0: and x9, x8, x9
-; -O0: mvn x12, x9
-; -O0: ldaxr x9, [x11]
-; -O0: cmp x9, x8
-; -O0: stlxr w10, x12, [x11]
-; -O0: subs x8, x9, x8
+; -O0: and x8, x0, x8
+; -O0: mvn x1, x8
+; -O0: bl __aarch64_cas8_rel
+; -O0: subs x8, x0, x8
;
; -O1-LABEL: atomicrmw_nand_i64_aligned_release:
; -O1: ldxr x0, [x8]
@@ -2574,12 +2458,10 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_release(ptr %ptr, i64 %value) {
define dso_local i64 @atomicrmw_nand_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_nand_i64_aligned_acq_rel:
-; -O0: and x9, x8, x9
-; -O0: mvn x12, x9
-; -O0: ldaxr x9, [x11]
-; -O0: cmp x9, x8
-; -O0: stlxr w10, x12, [x11]
-; -O0: subs x8, x9, x8
+; -O0: and x8, x0, x8
+; -O0: mvn x1, x8
+; -O0: bl __aarch64_cas8_acq_rel
+; -O0: subs x8, x0, x8
;
; -O1-LABEL: atomicrmw_nand_i64_aligned_acq_rel:
; -O1: ldaxr x0, [x8]
@@ -2592,12 +2474,10 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
define dso_local i64 @atomicrmw_nand_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_nand_i64_aligned_seq_cst:
-; -O0: and x9, x8, x9
-; -O0: mvn x12, x9
-; -O0: ldaxr x9, [x11]
-; -O0: cmp x9, x8
-; -O0: stlxr w10, x12, [x11]
-; -O0: subs x8, x9, x8
+; -O0: and x8, x0, x8
+; -O0: mvn x1, x8
+; -O0: bl __aarch64_cas8_acq_rel
+; -O0: subs x8, x0, x8
;
; -O1-LABEL: atomicrmw_nand_i64_aligned_seq_cst:
; -O1: ldaxr x0, [x8]
@@ -2612,16 +2492,12 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val
; -O0-LABEL: atomicrmw_nand_i128_aligned_monotonic:
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
-; -O0: mvn x14, x9
-; -O0: mvn x15, x8
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stxp w8, x14, x15, [x11]
-; -O0: stxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: mvn x2, x9
+; -O0: mvn x3, x8
+; -O0: bl __aarch64_cas16_relax
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic:
@@ -2639,16 +2515,12 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value
; -O0-LABEL: atomicrmw_nand_i128_aligned_acquire:
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
-; -O0: mvn x14, x9
-; -O0: mvn x15, x8
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stxp w8, x14, x15, [x11]
-; -O0: stxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: mvn x2, x9
+; -O0: mvn x3, x8
+; -O0: bl __aarch64_cas16_acq
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire:
@@ -2666,16 +2538,12 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value
; -O0-LABEL: atomicrmw_nand_i128_aligned_release:
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
-; -O0: mvn x14, x9
-; -O0: mvn x15, x8
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: mvn x2, x9
+; -O0: mvn x3, x8
+; -O0: bl __aarch64_cas16_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_nand_i128_aligned_release:
@@ -2693,16 +2561,12 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value
; -O0-LABEL: atomicrmw_nand_i128_aligned_acq_rel:
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
-; -O0: mvn x14, x9
-; -O0: mvn x15, x8
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: mvn x2, x9
+; -O0: mvn x3, x8
+; -O0: bl __aarch64_cas16_acq_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel:
@@ -2720,16 +2584,12 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
; -O0-LABEL: atomicrmw_nand_i128_aligned_seq_cst:
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
-; -O0: mvn x14, x9
-; -O0: mvn x15, x8
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: mvn x2, x9
+; -O0: mvn x3, x8
+; -O0: bl __aarch64_cas16_acq_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst:
@@ -2745,13 +2605,11 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
define dso_local i8 @atomicrmw_nand_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_nand_i8_unaligned_monotonic:
-; -O0: and w8, w10, w8
-; -O0: mvn w12, w8
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: and w8, w0, w8
+; -O0: mvn w1, w8
+; -O0: bl __aarch64_cas1_relax
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_nand_i8_unaligned_monotonic:
; -O1: ldxrb w8, [x0]
@@ -2764,13 +2622,11 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_monotonic(ptr %ptr, i8 %value)
define dso_local i8 @atomicrmw_nand_i8_unaligned_acquire(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_nand_i8_unaligned_acquire:
-; -O0: and w8, w10, w8
-; -O0: mvn w12, w8
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: and w8, w0, w8
+; -O0: mvn w1, w8
+; -O0: bl __aarch64_cas1_acq
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_nand_i8_unaligned_acquire:
; -O1: ldaxrb w8, [x0]
@@ -2783,13 +2639,11 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_acquire(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_nand_i8_unaligned_release(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_nand_i8_unaligned_release:
-; -O0: and w8, w10, w8
-; -O0: mvn w12, w8
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: and w8, w0, w8
+; -O0: mvn w1, w8
+; -O0: bl __aarch64_cas1_rel
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_nand_i8_unaligned_release:
; -O1: ldxrb w8, [x0]
@@ -2802,13 +2656,11 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_release(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_nand_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_nand_i8_unaligned_acq_rel:
-; -O0: and w8, w10, w8
-; -O0: mvn w12, w8
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: and w8, w0, w8
+; -O0: mvn w1, w8
+; -O0: bl __aarch64_cas1_acq_rel
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_nand_i8_unaligned_acq_rel:
; -O1: ldaxrb w8, [x0]
@@ -2821,13 +2673,11 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_nand_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_nand_i8_unaligned_seq_cst:
-; -O0: and w8, w10, w8
-; -O0: mvn w12, w8
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: and w8, w0, w8
+; -O0: mvn w1, w8
+; -O0: bl __aarch64_cas1_acq_rel
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_nand_i8_unaligned_seq_cst:
; -O1: ldaxrb w8, [x0]
@@ -3285,16 +3135,12 @@ define dso_local i64 @atomicrmw_or_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
define dso_local i128 @atomicrmw_or_i128_aligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_or_i128_aligned_monotonic:
-; -O0: orr x14, x8, x10
-; -O0: orr x15, x8, x9
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stxp w8, x14, x15, [x11]
-; -O0: stxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: orr x2, x8, x10
+; -O0: orr x3, x8, x9
+; -O0: bl __aarch64_cas16_relax
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic:
@@ -3308,16 +3154,12 @@ define dso_local i128 @atomicrmw_or_i128_aligned_monotonic(ptr %ptr, i128 %value
define dso_local i128 @atomicrmw_or_i128_aligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_or_i128_aligned_acquire:
-; -O0: orr x14, x8, x10
-; -O0: orr x15, x8, x9
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stxp w8, x14, x15, [x11]
-; -O0: stxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: orr x2, x8, x10
+; -O0: orr x3, x8, x9
+; -O0: bl __aarch64_cas16_acq
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_or_i128_aligned_acquire:
@@ -3331,16 +3173,12 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acquire(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_or_i128_aligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_or_i128_aligned_release:
-; -O0: orr x14, x8, x10
-; -O0: orr x15, x8, x9
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: orr x2, x8, x10
+; -O0: orr x3, x8, x9
+; -O0: bl __aarch64_cas16_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_or_i128_aligned_release:
@@ -3354,16 +3192,12 @@ define dso_local i128 @atomicrmw_or_i128_aligned_release(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_or_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_or_i128_aligned_acq_rel:
-; -O0: orr x14, x8, x10
-; -O0: orr x15, x8, x9
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: orr x2, x8, x10
+; -O0: orr x3, x8, x9
+; -O0: bl __aarch64_cas16_acq_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel:
@@ -3377,16 +3211,12 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acq_rel(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_or_i128_aligned_seq_cst:
-; -O0: orr x14, x8, x10
-; -O0: orr x15, x8, x9
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: orr x2, x8, x10
+; -O0: orr x3, x8, x9
+; -O0: bl __aarch64_cas16_acq_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst:
@@ -3830,16 +3660,12 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_xor_i128_aligned_monotonic:
-; -O0: eor x14, x8, x10
-; -O0: eor x15, x8, x9
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stxp w8, x14, x15, [x11]
-; -O0: stxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: eor x2, x8, x10
+; -O0: eor x3, x8, x9
+; -O0: bl __aarch64_cas16_relax
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic:
@@ -3853,16 +3679,12 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_xor_i128_aligned_acquire:
-; -O0: eor x14, x8, x10
-; -O0: eor x15, x8, x9
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stxp w8, x14, x15, [x11]
-; -O0: stxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: eor x2, x8, x10
+; -O0: eor x3, x8, x9
+; -O0: bl __aarch64_cas16_acq
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire:
@@ -3876,16 +3698,12 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_xor_i128_aligned_release:
-; -O0: eor x14, x8, x10
-; -O0: eor x15, x8, x9
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: eor x2, x8, x10
+; -O0: eor x3, x8, x9
+; -O0: bl __aarch64_cas16_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_xor_i128_aligned_release:
@@ -3899,16 +3717,12 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_xor_i128_aligned_acq_rel:
-; -O0: eor x14, x8, x10
-; -O0: eor x15, x8, x9
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: eor x2, x8, x10
+; -O0: eor x3, x8, x9
+; -O0: bl __aarch64_cas16_acq_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel:
@@ -3922,16 +3736,12 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_xor_i128_aligned_seq_cst:
-; -O0: eor x14, x8, x10
-; -O0: eor x15, x8, x9
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: eor x2, x8, x10
+; -O0: eor x3, x8, x9
+; -O0: bl __aarch64_cas16_acq_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst:
@@ -4235,14 +4045,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
define dso_local i8 @atomicrmw_max_i8_aligned_monotonic(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_max_i8_aligned_monotonic:
-; -O0: sxtb w9, w10
+; -O0: sxtb w9, w0
; -O0: subs w9, w9, w8, sxtb
-; -O0: csel w12, w10, w8, gt
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, gt
+; -O0: bl __aarch64_cas1_relax
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_max_i8_aligned_monotonic:
; -O1: ldxrb w9, [x0]
@@ -4256,14 +4064,12 @@ define dso_local i8 @atomicrmw_max_i8_aligned_monotonic(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_max_i8_aligned_acquire(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_max_i8_aligned_acquire:
-; -O0: sxtb w9, w10
+; -O0: sxtb w9, w0
; -O0: subs w9, w9, w8, sxtb
-; -O0: csel w12, w10, w8, gt
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, gt
+; -O0: bl __aarch64_cas1_acq
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_max_i8_aligned_acquire:
; -O1: ldaxrb w9, [x0]
@@ -4277,14 +4083,12 @@ define dso_local i8 @atomicrmw_max_i8_aligned_acquire(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_max_i8_aligned_release(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_max_i8_aligned_release:
-; -O0: sxtb w9, w10
+; -O0: sxtb w9, w0
; -O0: subs w9, w9, w8, sxtb
-; -O0: csel w12, w10, w8, gt
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, gt
+; -O0: bl __aarch64_cas1_rel
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_max_i8_aligned_release:
; -O1: ldxrb w9, [x0]
@@ -4298,14 +4102,12 @@ define dso_local i8 @atomicrmw_max_i8_aligned_release(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_max_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_max_i8_aligned_acq_rel:
-; -O0: sxtb w9, w10
+; -O0: sxtb w9, w0
; -O0: subs w9, w9, w8, sxtb
-; -O0: csel w12, w10, w8, gt
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, gt
+; -O0: bl __aarch64_cas1_acq_rel
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_max_i8_aligned_acq_rel:
; -O1: ldaxrb w9, [x0]
@@ -4319,14 +4121,12 @@ define dso_local i8 @atomicrmw_max_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_max_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_max_i8_aligned_seq_cst:
-; -O0: sxtb w9, w10
+; -O0: sxtb w9, w0
; -O0: subs w9, w9, w8, sxtb
-; -O0: csel w12, w10, w8, gt
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, gt
+; -O0: bl __aarch64_cas1_acq_rel
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_max_i8_aligned_seq_cst:
; -O1: ldaxrb w9, [x0]
@@ -4340,13 +4140,11 @@ define dso_local i8 @atomicrmw_max_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
define dso_local i16 @atomicrmw_max_i16_aligned_monotonic(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_max_i16_aligned_monotonic:
-; -O0: sxth w10, w8
-; -O0: subs w10, w10, w9, sxth
-; -O0: csel w12, w8, w9, gt
-; -O0: ldaxrh w9, [x11]
-; -O0: cmp w9, w8, uxth
-; -O0: stlxrh w10, w12, [x11]
-; -O0: subs w8, w8, w9, uxth
+; -O0: sxth w9, w0
+; -O0: subs w9, w9, w8, sxth
+; -O0: csel w1, w0, w8, gt
+; -O0: bl __aarch64_cas2_relax
+; -O0: subs w8, w8, w0, uxth
;
; -O1-LABEL: atomicrmw_max_i16_aligned_monotonic:
; -O1: ldxrh w9, [x0]
@@ -4360,13 +4158,11 @@ define dso_local i16 @atomicrmw_max_i16_aligned_monotonic(ptr %ptr, i16 %value)
define dso_local i16 @atomicrmw_max_i16_aligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_max_i16_aligned_acquire:
-; -O0: sxth w10, w8
-; -O0: subs w10, w10, w9, sxth
-; -O0: csel w12, w8, w9, gt
-; -O0: ldaxrh w9, [x11]
-; -O0: cmp w9, w8, uxth
-; -O0: stlxrh w10, w12, [x11]
-; -O0: subs w8, w8, w9, uxth
+; -O0: sxth w9, w0
+; -O0: subs w9, w9, w8, sxth
+; -O0: csel w1, w0, w8, gt
+; -O0: bl __aarch64_cas2_acq
+; -O0: subs w8, w8, w0, uxth
;
; -O1-LABEL: atomicrmw_max_i16_aligned_acquire:
; -O1: ldaxrh w9, [x0]
@@ -4380,13 +4176,11 @@ define dso_local i16 @atomicrmw_max_i16_aligned_acquire(ptr %ptr, i16 %value) {
define dso_local i16 @atomicrmw_max_i16_aligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_max_i16_aligned_release:
-; -O0: sxth w10, w8
-; -O0: subs w10, w10, w9, sxth
-; -O0: csel w12, w8, w9, gt
-; -O0: ldaxrh w9, [x11]
-; -O0: cmp w9, w8, uxth
-; -O0: stlxrh w10, w12, [x11]
-; -O0: subs w8, w8, w9, uxth
+; -O0: sxth w9, w0
+; -O0: subs w9, w9, w8, sxth
+; -O0: csel w1, w0, w8, gt
+; -O0: bl __aarch64_cas2_rel
+; -O0: subs w8, w8, w0, uxth
;
; -O1-LABEL: atomicrmw_max_i16_aligned_release:
; -O1: ldxrh w9, [x0]
@@ -4400,13 +4194,11 @@ define dso_local i16 @atomicrmw_max_i16_aligned_release(ptr %ptr, i16 %value) {
define dso_local i16 @atomicrmw_max_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_max_i16_aligned_acq_rel:
-; -O0: sxth w10, w8
-; -O0: subs w10, w10, w9, sxth
-; -O0: csel w12, w8, w9, gt
-; -O0: ldaxrh w9, [x11]
-; -O0: cmp w9, w8, uxth
-; -O0: stlxrh w10, w12, [x11]
-; -O0: subs w8, w8, w9, uxth
+; -O0: sxth w9, w0
+; -O0: subs w9, w9, w8, sxth
+; -O0: csel w1, w0, w8, gt
+; -O0: bl __aarch64_cas2_acq_rel
+; -O0: subs w8, w8, w0, uxth
;
; -O1-LABEL: atomicrmw_max_i16_aligned_acq_rel:
; -O1: ldaxrh w9, [x0]
@@ -4420,13 +4212,11 @@ define dso_local i16 @atomicrmw_max_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
define dso_local i16 @atomicrmw_max_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_max_i16_aligned_seq_cst:
-; -O0: sxth w10, w8
-; -O0: subs w10, w10, w9, sxth
-; -O0: csel w12, w8, w9, gt
-; -O0: ldaxrh w9, [x11]
-; -O0: cmp w9, w8, uxth
-; -O0: stlxrh w10, w12, [x11]
-; -O0: subs w8, w8, w9, uxth
+; -O0: sxth w9, w0
+; -O0: subs w9, w9, w8, sxth
+; -O0: csel w1, w0, w8, gt
+; -O0: bl __aarch64_cas2_acq_rel
+; -O0: subs w8, w8, w0, uxth
;
; -O1-LABEL: atomicrmw_max_i16_aligned_seq_cst:
; -O1: ldaxrh w9, [x0]
@@ -4440,12 +4230,10 @@ define dso_local i16 @atomicrmw_max_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_max_i32_aligned_monotonic:
-; -O0: subs w10, w8, w9
-; -O0: csel w12, w8, w9, gt
-; -O0: ldaxr w9, [x11]
-; -O0: cmp w9, w8
-; -O0: stlxr w10, w12, [x11]
-; -O0: subs w8, w9, w8
+; -O0: subs w9, w0, w8
+; -O0: csel w1, w0, w8, gt
+; -O0: bl __aarch64_cas4_relax
+; -O0: subs w8, w0, w8
;
; -O1-LABEL: atomicrmw_max_i32_aligned_monotonic:
; -O1: ldxr w8, [x0]
@@ -4458,12 +4246,10 @@ define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value)
define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_max_i32_aligned_acquire:
-; -O0: subs w10, w8, w9
-; -O0: csel w12, w8, w9, gt
-; -O0: ldaxr w9, [x11]
-; -O0: cmp w9, w8
-; -O0: stlxr w10, w12, [x11]
-; -O0: subs w8, w9, w8
+; -O0: subs w9, w0, w8
+; -O0: csel w1, w0, w8, gt
+; -O0: bl __aarch64_cas4_acq
+; -O0: subs w8, w0, w8
;
; -O1-LABEL: atomicrmw_max_i32_aligned_acquire:
; -O1: ldaxr w8, [x0]
@@ -4476,12 +4262,10 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) {
define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_max_i32_aligned_release:
-; -O0: subs w10, w8, w9
-; -O0: csel w12, w8, w9, gt
-; -O0: ldaxr w9, [x11]
-; -O0: cmp w9, w8
-; -O0: stlxr w10, w12, [x11]
-; -O0: subs w8, w9, w8
+; -O0: subs w9, w0, w8
+; -O0: csel w1, w0, w8, gt
+; -O0: bl __aarch64_cas4_rel
+; -O0: subs w8, w0, w8
;
; -O1-LABEL: atomicrmw_max_i32_aligned_release:
; -O1: ldxr w8, [x0]
@@ -4494,12 +4278,10 @@ define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) {
define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_max_i32_aligned_acq_rel:
-; -O0: subs w10, w8, w9
-; -O0: csel w12, w8, w9, gt
-; -O0: ldaxr w9, [x11]
-; -O0: cmp w9, w8
-; -O0: stlxr w10, w12, [x11]
-; -O0: subs w8, w9, w8
+; -O0: subs w9, w0, w8
+; -O0: csel w1, w0, w8, gt
+; -O0: bl __aarch64_cas4_acq_rel
+; -O0: subs w8, w0, w8
;
; -O1-LABEL: atomicrmw_max_i32_aligned_acq_rel:
; -O1: ldaxr w8, [x0]
@@ -4512,12 +4294,10 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_max_i32_aligned_seq_cst:
-; -O0: subs w10, w8, w9
-; -O0: csel w12, w8, w9, gt
-; -O0: ldaxr w9, [x11]
-; -O0: cmp w9, w8
-; -O0: stlxr w10, w12, [x11]
-; -O0: subs w8, w9, w8
+; -O0: subs w9, w0, w8
+; -O0: csel w1, w0, w8, gt
+; -O0: bl __aarch64_cas4_acq_rel
+; -O0: subs w8, w0, w8
;
; -O1-LABEL: atomicrmw_max_i32_aligned_seq_cst:
; -O1: ldaxr w8, [x0]
@@ -4530,12 +4310,10 @@ define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_max_i64_aligned_monotonic:
-; -O0: subs x10, x8, x9
-; -O0: csel x12, x8, x9, gt
-; -O0: ldaxr x9, [x11]
-; -O0: cmp x9, x8
-; -O0: stlxr w10, x12, [x11]
-; -O0: subs x8, x9, x8
+; -O0: subs x9, x0, x8
+; -O0: csel x1, x0, x8, gt
+; -O0: bl __aarch64_cas8_relax
+; -O0: subs x8, x0, x8
;
; -O1-LABEL: atomicrmw_max_i64_aligned_monotonic:
; -O1: ldxr x0, [x8]
@@ -4548,12 +4326,10 @@ define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value)
define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_max_i64_aligned_acquire:
-; -O0: subs x10, x8, x9
-; -O0: csel x12, x8, x9, gt
-; -O0: ldaxr x9, [x11]
-; -O0: cmp x9, x8
-; -O0: stlxr w10, x12, [x11]
-; -O0: subs x8, x9, x8
+; -O0: subs x9, x0, x8
+; -O0: csel x1, x0, x8, gt
+; -O0: bl __aarch64_cas8_acq
+; -O0: subs x8, x0, x8
;
; -O1-LABEL: atomicrmw_max_i64_aligned_acquire:
; -O1: ldaxr x0, [x8]
@@ -4566,12 +4342,10 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) {
define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_max_i64_aligned_release:
-; -O0: subs x10, x8, x9
-; -O0: csel x12, x8, x9, gt
-; -O0: ldaxr x9, [x11]
-; -O0: cmp x9, x8
-; -O0: stlxr w10, x12, [x11]
-; -O0: subs x8, x9, x8
+; -O0: subs x9, x0, x8
+; -O0: csel x1, x0, x8, gt
+; -O0: bl __aarch64_cas8_rel
+; -O0: subs x8, x0, x8
;
; -O1-LABEL: atomicrmw_max_i64_aligned_release:
; -O1: ldxr x0, [x8]
@@ -4584,12 +4358,10 @@ define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) {
define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_max_i64_aligned_acq_rel:
-; -O0: subs x10, x8, x9
-; -O0: csel x12, x8, x9, gt
-; -O0: ldaxr x9, [x11]
-; -O0: cmp x9, x8
-; -O0: stlxr w10, x12, [x11]
-; -O0: subs x8, x9, x8
+; -O0: subs x9, x0, x8
+; -O0: csel x1, x0, x8, gt
+; -O0: bl __aarch64_cas8_acq_rel
+; -O0: subs x8, x0, x8
;
; -O1-LABEL: atomicrmw_max_i64_aligned_acq_rel:
; -O1: ldaxr x0, [x8]
@@ -4602,12 +4374,10 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_max_i64_aligned_seq_cst:
-; -O0: subs x10, x8, x9
-; -O0: csel x12, x8, x9, gt
-; -O0: ldaxr x9, [x11]
-; -O0: cmp x9, x8
-; -O0: stlxr w10, x12, [x11]
-; -O0: subs x8, x9, x8
+; -O0: subs x9, x0, x8
+; -O0: csel x1, x0, x8, gt
+; -O0: bl __aarch64_cas8_acq_rel
+; -O0: subs x8, x0, x8
;
; -O1-LABEL: atomicrmw_max_i64_aligned_seq_cst:
; -O1: ldaxr x0, [x8]
@@ -4621,21 +4391,17 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic:
; -O0: subs x8, x8, x9
-; -O0: subs x8, x8, x12
-; -O0: subs x13, x13, x9
+; -O0: subs x8, x8, x11
+; -O0: subs x12, x12, x9
; -O0: csel w10, w8, w10, eq
-; -O0: ands w13, w10, #0x1
-; -O0: csel x14, x8, x12, ne
+; -O0: ands w12, w10, #0x1
+; -O0: csel x2, x8, x11, ne
; -O0: ands w10, w10, #0x1
-; -O0: csel x15, x8, x9, ne
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stxp w8, x14, x15, [x11]
-; -O0: stxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: csel x3, x8, x9, ne
+; -O0: bl __aarch64_cas16_relax
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic:
@@ -4651,21 +4417,17 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_max_i128_aligned_acquire:
; -O0: subs x8, x8, x9
-; -O0: subs x8, x8, x12
-; -O0: subs x13, x13, x9
+; -O0: subs x8, x8, x11
+; -O0: subs x12, x12, x9
; -O0: csel w10, w8, w10, eq
-; -O0: ands w13, w10, #0x1
-; -O0: csel x14, x8, x12, ne
+; -O0: ands w12, w10, #0x1
+; -O0: csel x2, x8, x11, ne
; -O0: ands w10, w10, #0x1
-; -O0: csel x15, x8, x9, ne
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stxp w8, x14, x15, [x11]
-; -O0: stxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: csel x3, x8, x9, ne
+; -O0: bl __aarch64_cas16_acq
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_max_i128_aligned_acquire:
@@ -4681,21 +4443,17 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_max_i128_aligned_release:
; -O0: subs x8, x8, x9
-; -O0: subs x8, x8, x12
-; -O0: subs x13, x13, x9
+; -O0: subs x8, x8, x11
+; -O0: subs x12, x12, x9
; -O0: csel w10, w8, w10, eq
-; -O0: ands w13, w10, #0x1
-; -O0: csel x14, x8, x12, ne
+; -O0: ands w12, w10, #0x1
+; -O0: csel x2, x8, x11, ne
; -O0: ands w10, w10, #0x1
-; -O0: csel x15, x8, x9, ne
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: csel x3, x8, x9, ne
+; -O0: bl __aarch64_cas16_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_max_i128_aligned_release:
@@ -4711,21 +4469,17 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel:
; -O0: subs x8, x8, x9
-; -O0: subs x8, x8, x12
-; -O0: subs x13, x13, x9
+; -O0: subs x8, x8, x11
+; -O0: subs x12, x12, x9
; -O0: csel w10, w8, w10, eq
-; -O0: ands w13, w10, #0x1
-; -O0: csel x14, x8, x12, ne
+; -O0: ands w12, w10, #0x1
+; -O0: csel x2, x8, x11, ne
; -O0: ands w10, w10, #0x1
-; -O0: csel x15, x8, x9, ne
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: csel x3, x8, x9, ne
+; -O0: bl __aarch64_cas16_acq_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel:
@@ -4741,21 +4495,17 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst:
; -O0: subs x8, x8, x9
-; -O0: subs x8, x8, x12
-; -O0: subs x13, x13, x9
+; -O0: subs x8, x8, x11
+; -O0: subs x12, x12, x9
; -O0: csel w10, w8, w10, eq
-; -O0: ands w13, w10, #0x1
-; -O0: csel x14, x8, x12, ne
+; -O0: ands w12, w10, #0x1
+; -O0: csel x2, x8, x11, ne
; -O0: ands w10, w10, #0x1
-; -O0: csel x15, x8, x9, ne
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: csel x3, x8, x9, ne
+; -O0: bl __aarch64_cas16_acq_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst:
@@ -4770,14 +4520,12 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value)
define dso_local i8 @atomicrmw_max_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_max_i8_unaligned_monotonic:
-; -O0: sxtb w9, w10
+; -O0: sxtb w9, w0
; -O0: subs w9, w9, w8, sxtb
-; -O0: csel w12, w10, w8, gt
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, gt
+; -O0: bl __aarch64_cas1_relax
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_max_i8_unaligned_monotonic:
; -O1: ldxrb w9, [x0]
@@ -4791,14 +4539,12 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_max_i8_unaligned_acquire(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_max_i8_unaligned_acquire:
-; -O0: sxtb w9, w10
+; -O0: sxtb w9, w0
; -O0: subs w9, w9, w8, sxtb
-; -O0: csel w12, w10, w8, gt
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, gt
+; -O0: bl __aarch64_cas1_acq
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_max_i8_unaligned_acquire:
; -O1: ldaxrb w9, [x0]
@@ -4812,14 +4558,12 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_acquire(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_max_i8_unaligned_release(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_max_i8_unaligned_release:
-; -O0: sxtb w9, w10
+; -O0: sxtb w9, w0
; -O0: subs w9, w9, w8, sxtb
-; -O0: csel w12, w10, w8, gt
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, gt
+; -O0: bl __aarch64_cas1_rel
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_max_i8_unaligned_release:
; -O1: ldxrb w9, [x0]
@@ -4833,14 +4577,12 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_release(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_max_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_max_i8_unaligned_acq_rel:
-; -O0: sxtb w9, w10
+; -O0: sxtb w9, w0
; -O0: subs w9, w9, w8, sxtb
-; -O0: csel w12, w10, w8, gt
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, gt
+; -O0: bl __aarch64_cas1_acq_rel
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_max_i8_unaligned_acq_rel:
; -O1: ldaxrb w9, [x0]
@@ -4854,14 +4596,12 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_max_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_max_i8_unaligned_seq_cst:
-; -O0: sxtb w9, w10
+; -O0: sxtb w9, w0
; -O0: subs w9, w9, w8, sxtb
-; -O0: csel w12, w10, w8, gt
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, gt
+; -O0: bl __aarch64_cas1_acq_rel
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_max_i8_unaligned_seq_cst:
; -O1: ldaxrb w9, [x0]
@@ -5205,14 +4945,12 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
define dso_local i8 @atomicrmw_min_i8_aligned_monotonic(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_min_i8_aligned_monotonic:
-; -O0: sxtb w9, w10
+; -O0: sxtb w9, w0
; -O0: subs w9, w9, w8, sxtb
-; -O0: csel w12, w10, w8, le
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, le
+; -O0: bl __aarch64_cas1_relax
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_min_i8_aligned_monotonic:
; -O1: ldxrb w9, [x0]
@@ -5226,14 +4964,12 @@ define dso_local i8 @atomicrmw_min_i8_aligned_monotonic(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_min_i8_aligned_acquire(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_min_i8_aligned_acquire:
-; -O0: sxtb w9, w10
+; -O0: sxtb w9, w0
; -O0: subs w9, w9, w8, sxtb
-; -O0: csel w12, w10, w8, le
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, le
+; -O0: bl __aarch64_cas1_acq
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_min_i8_aligned_acquire:
; -O1: ldaxrb w9, [x0]
@@ -5247,14 +4983,12 @@ define dso_local i8 @atomicrmw_min_i8_aligned_acquire(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_min_i8_aligned_release(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_min_i8_aligned_release:
-; -O0: sxtb w9, w10
+; -O0: sxtb w9, w0
; -O0: subs w9, w9, w8, sxtb
-; -O0: csel w12, w10, w8, le
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, le
+; -O0: bl __aarch64_cas1_rel
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_min_i8_aligned_release:
; -O1: ldxrb w9, [x0]
@@ -5268,14 +5002,12 @@ define dso_local i8 @atomicrmw_min_i8_aligned_release(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_min_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_min_i8_aligned_acq_rel:
-; -O0: sxtb w9, w10
+; -O0: sxtb w9, w0
; -O0: subs w9, w9, w8, sxtb
-; -O0: csel w12, w10, w8, le
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, le
+; -O0: bl __aarch64_cas1_acq_rel
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_min_i8_aligned_acq_rel:
; -O1: ldaxrb w9, [x0]
@@ -5289,14 +5021,12 @@ define dso_local i8 @atomicrmw_min_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_min_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_min_i8_aligned_seq_cst:
-; -O0: sxtb w9, w10
+; -O0: sxtb w9, w0
; -O0: subs w9, w9, w8, sxtb
-; -O0: csel w12, w10, w8, le
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, le
+; -O0: bl __aarch64_cas1_acq_rel
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_min_i8_aligned_seq_cst:
; -O1: ldaxrb w9, [x0]
@@ -5310,13 +5040,11 @@ define dso_local i8 @atomicrmw_min_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
define dso_local i16 @atomicrmw_min_i16_aligned_monotonic(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_min_i16_aligned_monotonic:
-; -O0: sxth w10, w8
-; -O0: subs w10, w10, w9, sxth
-; -O0: csel w12, w8, w9, le
-; -O0: ldaxrh w9, [x11]
-; -O0: cmp w9, w8, uxth
-; -O0: stlxrh w10, w12, [x11]
-; -O0: subs w8, w8, w9, uxth
+; -O0: sxth w9, w0
+; -O0: subs w9, w9, w8, sxth
+; -O0: csel w1, w0, w8, le
+; -O0: bl __aarch64_cas2_relax
+; -O0: subs w8, w8, w0, uxth
;
; -O1-LABEL: atomicrmw_min_i16_aligned_monotonic:
; -O1: ldxrh w9, [x0]
@@ -5330,13 +5058,11 @@ define dso_local i16 @atomicrmw_min_i16_aligned_monotonic(ptr %ptr, i16 %value)
define dso_local i16 @atomicrmw_min_i16_aligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_min_i16_aligned_acquire:
-; -O0: sxth w10, w8
-; -O0: subs w10, w10, w9, sxth
-; -O0: csel w12, w8, w9, le
-; -O0: ldaxrh w9, [x11]
-; -O0: cmp w9, w8, uxth
-; -O0: stlxrh w10, w12, [x11]
-; -O0: subs w8, w8, w9, uxth
+; -O0: sxth w9, w0
+; -O0: subs w9, w9, w8, sxth
+; -O0: csel w1, w0, w8, le
+; -O0: bl __aarch64_cas2_acq
+; -O0: subs w8, w8, w0, uxth
;
; -O1-LABEL: atomicrmw_min_i16_aligned_acquire:
; -O1: ldaxrh w9, [x0]
@@ -5350,13 +5076,11 @@ define dso_local i16 @atomicrmw_min_i16_aligned_acquire(ptr %ptr, i16 %value) {
define dso_local i16 @atomicrmw_min_i16_aligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_min_i16_aligned_release:
-; -O0: sxth w10, w8
-; -O0: subs w10, w10, w9, sxth
-; -O0: csel w12, w8, w9, le
-; -O0: ldaxrh w9, [x11]
-; -O0: cmp w9, w8, uxth
-; -O0: stlxrh w10, w12, [x11]
-; -O0: subs w8, w8, w9, uxth
+; -O0: sxth w9, w0
+; -O0: subs w9, w9, w8, sxth
+; -O0: csel w1, w0, w8, le
+; -O0: bl __aarch64_cas2_rel
+; -O0: subs w8, w8, w0, uxth
;
; -O1-LABEL: atomicrmw_min_i16_aligned_release:
; -O1: ldxrh w9, [x0]
@@ -5370,13 +5094,11 @@ define dso_local i16 @atomicrmw_min_i16_aligned_release(ptr %ptr, i16 %value) {
define dso_local i16 @atomicrmw_min_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_min_i16_aligned_acq_rel:
-; -O0: sxth w10, w8
-; -O0: subs w10, w10, w9, sxth
-; -O0: csel w12, w8, w9, le
-; -O0: ldaxrh w9, [x11]
-; -O0: cmp w9, w8, uxth
-; -O0: stlxrh w10, w12, [x11]
-; -O0: subs w8, w8, w9, uxth
+; -O0: sxth w9, w0
+; -O0: subs w9, w9, w8, sxth
+; -O0: csel w1, w0, w8, le
+; -O0: bl __aarch64_cas2_acq_rel
+; -O0: subs w8, w8, w0, uxth
;
; -O1-LABEL: atomicrmw_min_i16_aligned_acq_rel:
; -O1: ldaxrh w9, [x0]
@@ -5390,13 +5112,11 @@ define dso_local i16 @atomicrmw_min_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
define dso_local i16 @atomicrmw_min_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_min_i16_aligned_seq_cst:
-; -O0: sxth w10, w8
-; -O0: subs w10, w10, w9, sxth
-; -O0: csel w12, w8, w9, le
-; -O0: ldaxrh w9, [x11]
-; -O0: cmp w9, w8, uxth
-; -O0: stlxrh w10, w12, [x11]
-; -O0: subs w8, w8, w9, uxth
+; -O0: sxth w9, w0
+; -O0: subs w9, w9, w8, sxth
+; -O0: csel w1, w0, w8, le
+; -O0: bl __aarch64_cas2_acq_rel
+; -O0: subs w8, w8, w0, uxth
;
; -O1-LABEL: atomicrmw_min_i16_aligned_seq_cst:
; -O1: ldaxrh w9, [x0]
@@ -5410,12 +5130,10 @@ define dso_local i16 @atomicrmw_min_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_min_i32_aligned_monotonic:
-; -O0: subs w10, w8, w9
-; -O0: csel w12, w8, w9, le
-; -O0: ldaxr w9, [x11]
-; -O0: cmp w9, w8
-; -O0: stlxr w10, w12, [x11]
-; -O0: subs w8, w9, w8
+; -O0: subs w9, w0, w8
+; -O0: csel w1, w0, w8, le
+; -O0: bl __aarch64_cas4_relax
+; -O0: subs w8, w0, w8
;
; -O1-LABEL: atomicrmw_min_i32_aligned_monotonic:
; -O1: ldxr w8, [x0]
@@ -5428,12 +5146,10 @@ define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value)
define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_min_i32_aligned_acquire:
-; -O0: subs w10, w8, w9
-; -O0: csel w12, w8, w9, le
-; -O0: ldaxr w9, [x11]
-; -O0: cmp w9, w8
-; -O0: stlxr w10, w12, [x11]
-; -O0: subs w8, w9, w8
+; -O0: subs w9, w0, w8
+; -O0: csel w1, w0, w8, le
+; -O0: bl __aarch64_cas4_acq
+; -O0: subs w8, w0, w8
;
; -O1-LABEL: atomicrmw_min_i32_aligned_acquire:
; -O1: ldaxr w8, [x0]
@@ -5446,12 +5162,10 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) {
define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_min_i32_aligned_release:
-; -O0: subs w10, w8, w9
-; -O0: csel w12, w8, w9, le
-; -O0: ldaxr w9, [x11]
-; -O0: cmp w9, w8
-; -O0: stlxr w10, w12, [x11]
-; -O0: subs w8, w9, w8
+; -O0: subs w9, w0, w8
+; -O0: csel w1, w0, w8, le
+; -O0: bl __aarch64_cas4_rel
+; -O0: subs w8, w0, w8
;
; -O1-LABEL: atomicrmw_min_i32_aligned_release:
; -O1: ldxr w8, [x0]
@@ -5464,12 +5178,10 @@ define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) {
define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_min_i32_aligned_acq_rel:
-; -O0: subs w10, w8, w9
-; -O0: csel w12, w8, w9, le
-; -O0: ldaxr w9, [x11]
-; -O0: cmp w9, w8
-; -O0: stlxr w10, w12, [x11]
-; -O0: subs w8, w9, w8
+; -O0: subs w9, w0, w8
+; -O0: csel w1, w0, w8, le
+; -O0: bl __aarch64_cas4_acq_rel
+; -O0: subs w8, w0, w8
;
; -O1-LABEL: atomicrmw_min_i32_aligned_acq_rel:
; -O1: ldaxr w8, [x0]
@@ -5482,12 +5194,10 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_min_i32_aligned_seq_cst:
-; -O0: subs w10, w8, w9
-; -O0: csel w12, w8, w9, le
-; -O0: ldaxr w9, [x11]
-; -O0: cmp w9, w8
-; -O0: stlxr w10, w12, [x11]
-; -O0: subs w8, w9, w8
+; -O0: subs w9, w0, w8
+; -O0: csel w1, w0, w8, le
+; -O0: bl __aarch64_cas4_acq_rel
+; -O0: subs w8, w0, w8
;
; -O1-LABEL: atomicrmw_min_i32_aligned_seq_cst:
; -O1: ldaxr w8, [x0]
@@ -5500,12 +5210,10 @@ define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_min_i64_aligned_monotonic:
-; -O0: subs x10, x8, x9
-; -O0: csel x12, x8, x9, le
-; -O0: ldaxr x9, [x11]
-; -O0: cmp x9, x8
-; -O0: stlxr w10, x12, [x11]
-; -O0: subs x8, x9, x8
+; -O0: subs x9, x0, x8
+; -O0: csel x1, x0, x8, le
+; -O0: bl __aarch64_cas8_relax
+; -O0: subs x8, x0, x8
;
; -O1-LABEL: atomicrmw_min_i64_aligned_monotonic:
; -O1: ldxr x0, [x8]
@@ -5518,12 +5226,10 @@ define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value)
define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_min_i64_aligned_acquire:
-; -O0: subs x10, x8, x9
-; -O0: csel x12, x8, x9, le
-; -O0: ldaxr x9, [x11]
-; -O0: cmp x9, x8
-; -O0: stlxr w10, x12, [x11]
-; -O0: subs x8, x9, x8
+; -O0: subs x9, x0, x8
+; -O0: csel x1, x0, x8, le
+; -O0: bl __aarch64_cas8_acq
+; -O0: subs x8, x0, x8
;
; -O1-LABEL: atomicrmw_min_i64_aligned_acquire:
; -O1: ldaxr x0, [x8]
@@ -5536,12 +5242,10 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) {
define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_min_i64_aligned_release:
-; -O0: subs x10, x8, x9
-; -O0: csel x12, x8, x9, le
-; -O0: ldaxr x9, [x11]
-; -O0: cmp x9, x8
-; -O0: stlxr w10, x12, [x11]
-; -O0: subs x8, x9, x8
+; -O0: subs x9, x0, x8
+; -O0: csel x1, x0, x8, le
+; -O0: bl __aarch64_cas8_rel
+; -O0: subs x8, x0, x8
;
; -O1-LABEL: atomicrmw_min_i64_aligned_release:
; -O1: ldxr x0, [x8]
@@ -5554,12 +5258,10 @@ define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) {
define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_min_i64_aligned_acq_rel:
-; -O0: subs x10, x8, x9
-; -O0: csel x12, x8, x9, le
-; -O0: ldaxr x9, [x11]
-; -O0: cmp x9, x8
-; -O0: stlxr w10, x12, [x11]
-; -O0: subs x8, x9, x8
+; -O0: subs x9, x0, x8
+; -O0: csel x1, x0, x8, le
+; -O0: bl __aarch64_cas8_acq_rel
+; -O0: subs x8, x0, x8
;
; -O1-LABEL: atomicrmw_min_i64_aligned_acq_rel:
; -O1: ldaxr x0, [x8]
@@ -5572,12 +5274,10 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_min_i64_aligned_seq_cst:
-; -O0: subs x10, x8, x9
-; -O0: csel x12, x8, x9, le
-; -O0: ldaxr x9, [x11]
-; -O0: cmp x9, x8
-; -O0: stlxr w10, x12, [x11]
-; -O0: subs x8, x9, x8
+; -O0: subs x9, x0, x8
+; -O0: csel x1, x0, x8, le
+; -O0: bl __aarch64_cas8_acq_rel
+; -O0: subs x8, x0, x8
;
; -O1-LABEL: atomicrmw_min_i64_aligned_seq_cst:
; -O1: ldaxr x0, [x8]
@@ -5591,21 +5291,17 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic:
; -O0: subs x8, x8, x9
-; -O0: subs x8, x8, x12
-; -O0: subs x13, x13, x9
+; -O0: subs x8, x8, x11
+; -O0: subs x12, x12, x9
; -O0: csel w10, w8, w10, eq
-; -O0: ands w13, w10, #0x1
-; -O0: csel x14, x8, x12, ne
+; -O0: ands w12, w10, #0x1
+; -O0: csel x2, x8, x11, ne
; -O0: ands w10, w10, #0x1
-; -O0: csel x15, x8, x9, ne
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stxp w8, x14, x15, [x11]
-; -O0: stxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: csel x3, x8, x9, ne
+; -O0: bl __aarch64_cas16_relax
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic:
@@ -5621,21 +5317,17 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_min_i128_aligned_acquire:
; -O0: subs x8, x8, x9
-; -O0: subs x8, x8, x12
-; -O0: subs x13, x13, x9
+; -O0: subs x8, x8, x11
+; -O0: subs x12, x12, x9
; -O0: csel w10, w8, w10, eq
-; -O0: ands w13, w10, #0x1
-; -O0: csel x14, x8, x12, ne
+; -O0: ands w12, w10, #0x1
+; -O0: csel x2, x8, x11, ne
; -O0: ands w10, w10, #0x1
-; -O0: csel x15, x8, x9, ne
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stxp w8, x14, x15, [x11]
-; -O0: stxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: csel x3, x8, x9, ne
+; -O0: bl __aarch64_cas16_acq
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_min_i128_aligned_acquire:
@@ -5651,21 +5343,17 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_min_i128_aligned_release:
; -O0: subs x8, x8, x9
-; -O0: subs x8, x8, x12
-; -O0: subs x13, x13, x9
+; -O0: subs x8, x8, x11
+; -O0: subs x12, x12, x9
; -O0: csel w10, w8, w10, eq
-; -O0: ands w13, w10, #0x1
-; -O0: csel x14, x8, x12, ne
+; -O0: ands w12, w10, #0x1
+; -O0: csel x2, x8, x11, ne
; -O0: ands w10, w10, #0x1
-; -O0: csel x15, x8, x9, ne
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: csel x3, x8, x9, ne
+; -O0: bl __aarch64_cas16_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_min_i128_aligned_release:
@@ -5681,21 +5369,17 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel:
; -O0: subs x8, x8, x9
-; -O0: subs x8, x8, x12
-; -O0: subs x13, x13, x9
+; -O0: subs x8, x8, x11
+; -O0: subs x12, x12, x9
; -O0: csel w10, w8, w10, eq
-; -O0: ands w13, w10, #0x1
-; -O0: csel x14, x8, x12, ne
+; -O0: ands w12, w10, #0x1
+; -O0: csel x2, x8, x11, ne
; -O0: ands w10, w10, #0x1
-; -O0: csel x15, x8, x9, ne
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: csel x3, x8, x9, ne
+; -O0: bl __aarch64_cas16_acq_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel:
@@ -5711,21 +5395,17 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst:
; -O0: subs x8, x8, x9
-; -O0: subs x8, x8, x12
-; -O0: subs x13, x13, x9
+; -O0: subs x8, x8, x11
+; -O0: subs x12, x12, x9
; -O0: csel w10, w8, w10, eq
-; -O0: ands w13, w10, #0x1
-; -O0: csel x14, x8, x12, ne
+; -O0: ands w12, w10, #0x1
+; -O0: csel x2, x8, x11, ne
; -O0: ands w10, w10, #0x1
-; -O0: csel x15, x8, x9, ne
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: csel x3, x8, x9, ne
+; -O0: bl __aarch64_cas16_acq_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst:
@@ -5740,14 +5420,12 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value)
define dso_local i8 @atomicrmw_min_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_min_i8_unaligned_monotonic:
-; -O0: sxtb w9, w10
+; -O0: sxtb w9, w0
; -O0: subs w9, w9, w8, sxtb
-; -O0: csel w12, w10, w8, le
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, le
+; -O0: bl __aarch64_cas1_relax
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_min_i8_unaligned_monotonic:
; -O1: ldxrb w9, [x0]
@@ -5761,14 +5439,12 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_min_i8_unaligned_acquire(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_min_i8_unaligned_acquire:
-; -O0: sxtb w9, w10
+; -O0: sxtb w9, w0
; -O0: subs w9, w9, w8, sxtb
-; -O0: csel w12, w10, w8, le
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, le
+; -O0: bl __aarch64_cas1_acq
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_min_i8_unaligned_acquire:
; -O1: ldaxrb w9, [x0]
@@ -5782,14 +5458,12 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_acquire(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_min_i8_unaligned_release(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_min_i8_unaligned_release:
-; -O0: sxtb w9, w10
+; -O0: sxtb w9, w0
; -O0: subs w9, w9, w8, sxtb
-; -O0: csel w12, w10, w8, le
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, le
+; -O0: bl __aarch64_cas1_rel
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_min_i8_unaligned_release:
; -O1: ldxrb w9, [x0]
@@ -5803,14 +5477,12 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_release(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_min_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_min_i8_unaligned_acq_rel:
-; -O0: sxtb w9, w10
+; -O0: sxtb w9, w0
; -O0: subs w9, w9, w8, sxtb
-; -O0: csel w12, w10, w8, le
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, le
+; -O0: bl __aarch64_cas1_acq_rel
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_min_i8_unaligned_acq_rel:
; -O1: ldaxrb w9, [x0]
@@ -5824,14 +5496,12 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_min_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_min_i8_unaligned_seq_cst:
-; -O0: sxtb w9, w10
+; -O0: sxtb w9, w0
; -O0: subs w9, w9, w8, sxtb
-; -O0: csel w12, w10, w8, le
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, le
+; -O0: bl __aarch64_cas1_acq_rel
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_min_i8_unaligned_seq_cst:
; -O1: ldaxrb w9, [x0]
@@ -6175,14 +5845,12 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
define dso_local i8 @atomicrmw_umax_i8_aligned_monotonic(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_umax_i8_aligned_monotonic:
-; -O0: and w9, w10, #0xff
+; -O0: and w9, w0, #0xff
; -O0: subs w9, w9, w8, uxtb
-; -O0: csel w12, w10, w8, hi
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, hi
+; -O0: bl __aarch64_cas1_relax
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_umax_i8_aligned_monotonic:
; -O1: and w9, w1, #0xff
@@ -6196,14 +5864,12 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_monotonic(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_umax_i8_aligned_acquire(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_umax_i8_aligned_acquire:
-; -O0: and w9, w10, #0xff
+; -O0: and w9, w0, #0xff
; -O0: subs w9, w9, w8, uxtb
-; -O0: csel w12, w10, w8, hi
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, hi
+; -O0: bl __aarch64_cas1_acq
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_umax_i8_aligned_acquire:
; -O1: and w9, w1, #0xff
@@ -6217,14 +5883,12 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_acquire(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_umax_i8_aligned_release(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_umax_i8_aligned_release:
-; -O0: and w9, w10, #0xff
+; -O0: and w9, w0, #0xff
; -O0: subs w9, w9, w8, uxtb
-; -O0: csel w12, w10, w8, hi
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, hi
+; -O0: bl __aarch64_cas1_rel
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_umax_i8_aligned_release:
; -O1: and w9, w1, #0xff
@@ -6238,14 +5902,12 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_release(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_umax_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_umax_i8_aligned_acq_rel:
-; -O0: and w9, w10, #0xff
+; -O0: and w9, w0, #0xff
; -O0: subs w9, w9, w8, uxtb
-; -O0: csel w12, w10, w8, hi
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, hi
+; -O0: bl __aarch64_cas1_acq_rel
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_umax_i8_aligned_acq_rel:
; -O1: and w9, w1, #0xff
@@ -6259,14 +5921,12 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_umax_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_umax_i8_aligned_seq_cst:
-; -O0: and w9, w10, #0xff
+; -O0: and w9, w0, #0xff
; -O0: subs w9, w9, w8, uxtb
-; -O0: csel w12, w10, w8, hi
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, hi
+; -O0: bl __aarch64_cas1_acq_rel
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_umax_i8_aligned_seq_cst:
; -O1: and w9, w1, #0xff
@@ -6280,12 +5940,10 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_umax_i16_aligned_monotonic:
-; -O0: subs w10, w10, w9, uxth
-; -O0: csel w12, w8, w9, hi
-; -O0: ldaxrh w9, [x11]
-; -O0: cmp w9, w8, uxth
-; -O0: stlxrh w10, w12, [x11]
-; -O0: subs w8, w8, w9, uxth
+; -O0: subs w9, w9, w8, uxth
+; -O0: csel w1, w0, w8, hi
+; -O0: bl __aarch64_cas2_relax
+; -O0: subs w8, w8, w0, uxth
;
; -O1-LABEL: atomicrmw_umax_i16_aligned_monotonic:
; -O1: and w9, w1, #0xffff
@@ -6299,12 +5957,10 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value)
define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_umax_i16_aligned_acquire:
-; -O0: subs w10, w10, w9, uxth
-; -O0: csel w12, w8, w9, hi
-; -O0: ldaxrh w9, [x11]
-; -O0: cmp w9, w8, uxth
-; -O0: stlxrh w10, w12, [x11]
-; -O0: subs w8, w8, w9, uxth
+; -O0: subs w9, w9, w8, uxth
+; -O0: csel w1, w0, w8, hi
+; -O0: bl __aarch64_cas2_acq
+; -O0: subs w8, w8, w0, uxth
;
; -O1-LABEL: atomicrmw_umax_i16_aligned_acquire:
; -O1: and w9, w1, #0xffff
@@ -6318,12 +5974,10 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) {
define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_umax_i16_aligned_release:
-; -O0: subs w10, w10, w9, uxth
-; -O0: csel w12, w8, w9, hi
-; -O0: ldaxrh w9, [x11]
-; -O0: cmp w9, w8, uxth
-; -O0: stlxrh w10, w12, [x11]
-; -O0: subs w8, w8, w9, uxth
+; -O0: subs w9, w9, w8, uxth
+; -O0: csel w1, w0, w8, hi
+; -O0: bl __aarch64_cas2_rel
+; -O0: subs w8, w8, w0, uxth
;
; -O1-LABEL: atomicrmw_umax_i16_aligned_release:
; -O1: and w9, w1, #0xffff
@@ -6337,12 +5991,10 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) {
define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_umax_i16_aligned_acq_rel:
-; -O0: subs w10, w10, w9, uxth
-; -O0: csel w12, w8, w9, hi
-; -O0: ldaxrh w9, [x11]
-; -O0: cmp w9, w8, uxth
-; -O0: stlxrh w10, w12, [x11]
-; -O0: subs w8, w8, w9, uxth
+; -O0: subs w9, w9, w8, uxth
+; -O0: csel w1, w0, w8, hi
+; -O0: bl __aarch64_cas2_acq_rel
+; -O0: subs w8, w8, w0, uxth
;
; -O1-LABEL: atomicrmw_umax_i16_aligned_acq_rel:
; -O1: and w9, w1, #0xffff
@@ -6356,12 +6008,10 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_umax_i16_aligned_seq_cst:
-; -O0: subs w10, w10, w9, uxth
-; -O0: csel w12, w8, w9, hi
-; -O0: ldaxrh w9, [x11]
-; -O0: cmp w9, w8, uxth
-; -O0: stlxrh w10, w12, [x11]
-; -O0: subs w8, w8, w9, uxth
+; -O0: subs w9, w9, w8, uxth
+; -O0: csel w1, w0, w8, hi
+; -O0: bl __aarch64_cas2_acq_rel
+; -O0: subs w8, w8, w0, uxth
;
; -O1-LABEL: atomicrmw_umax_i16_aligned_seq_cst:
; -O1: and w9, w1, #0xffff
@@ -6375,12 +6025,10 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_umax_i32_aligned_monotonic:
-; -O0: subs w10, w8, w9
-; -O0: csel w12, w8, w9, hi
-; -O0: ldaxr w9, [x11]
-; -O0: cmp w9, w8
-; -O0: stlxr w10, w12, [x11]
-; -O0: subs w8, w9, w8
+; -O0: subs w9, w0, w8
+; -O0: csel w1, w0, w8, hi
+; -O0: bl __aarch64_cas4_relax
+; -O0: subs w8, w0, w8
;
; -O1-LABEL: atomicrmw_umax_i32_aligned_monotonic:
; -O1: ldxr w8, [x0]
@@ -6393,12 +6041,10 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value)
define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_umax_i32_aligned_acquire:
-; -O0: subs w10, w8, w9
-; -O0: csel w12, w8, w9, hi
-; -O0: ldaxr w9, [x11]
-; -O0: cmp w9, w8
-; -O0: stlxr w10, w12, [x11]
-; -O0: subs w8, w9, w8
+; -O0: subs w9, w0, w8
+; -O0: csel w1, w0, w8, hi
+; -O0: bl __aarch64_cas4_acq
+; -O0: subs w8, w0, w8
;
; -O1-LABEL: atomicrmw_umax_i32_aligned_acquire:
; -O1: ldaxr w8, [x0]
@@ -6411,12 +6057,10 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) {
define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_umax_i32_aligned_release:
-; -O0: subs w10, w8, w9
-; -O0: csel w12, w8, w9, hi
-; -O0: ldaxr w9, [x11]
-; -O0: cmp w9, w8
-; -O0: stlxr w10, w12, [x11]
-; -O0: subs w8, w9, w8
+; -O0: subs w9, w0, w8
+; -O0: csel w1, w0, w8, hi
+; -O0: bl __aarch64_cas4_rel
+; -O0: subs w8, w0, w8
;
; -O1-LABEL: atomicrmw_umax_i32_aligned_release:
; -O1: ldxr w8, [x0]
@@ -6429,12 +6073,10 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) {
define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_umax_i32_aligned_acq_rel:
-; -O0: subs w10, w8, w9
-; -O0: csel w12, w8, w9, hi
-; -O0: ldaxr w9, [x11]
-; -O0: cmp w9, w8
-; -O0: stlxr w10, w12, [x11]
-; -O0: subs w8, w9, w8
+; -O0: subs w9, w0, w8
+; -O0: csel w1, w0, w8, hi
+; -O0: bl __aarch64_cas4_acq_rel
+; -O0: subs w8, w0, w8
;
; -O1-LABEL: atomicrmw_umax_i32_aligned_acq_rel:
; -O1: ldaxr w8, [x0]
@@ -6447,12 +6089,10 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_umax_i32_aligned_seq_cst:
-; -O0: subs w10, w8, w9
-; -O0: csel w12, w8, w9, hi
-; -O0: ldaxr w9, [x11]
-; -O0: cmp w9, w8
-; -O0: stlxr w10, w12, [x11]
-; -O0: subs w8, w9, w8
+; -O0: subs w9, w0, w8
+; -O0: csel w1, w0, w8, hi
+; -O0: bl __aarch64_cas4_acq_rel
+; -O0: subs w8, w0, w8
;
; -O1-LABEL: atomicrmw_umax_i32_aligned_seq_cst:
; -O1: ldaxr w8, [x0]
@@ -6465,12 +6105,10 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_umax_i64_aligned_monotonic:
-; -O0: subs x10, x8, x9
-; -O0: csel x12, x8, x9, hi
-; -O0: ldaxr x9, [x11]
-; -O0: cmp x9, x8
-; -O0: stlxr w10, x12, [x11]
-; -O0: subs x8, x9, x8
+; -O0: subs x9, x0, x8
+; -O0: csel x1, x0, x8, hi
+; -O0: bl __aarch64_cas8_relax
+; -O0: subs x8, x0, x8
;
; -O1-LABEL: atomicrmw_umax_i64_aligned_monotonic:
; -O1: ldxr x0, [x8]
@@ -6483,12 +6121,10 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value)
define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_umax_i64_aligned_acquire:
-; -O0: subs x10, x8, x9
-; -O0: csel x12, x8, x9, hi
-; -O0: ldaxr x9, [x11]
-; -O0: cmp x9, x8
-; -O0: stlxr w10, x12, [x11]
-; -O0: subs x8, x9, x8
+; -O0: subs x9, x0, x8
+; -O0: csel x1, x0, x8, hi
+; -O0: bl __aarch64_cas8_acq
+; -O0: subs x8, x0, x8
;
; -O1-LABEL: atomicrmw_umax_i64_aligned_acquire:
; -O1: ldaxr x0, [x8]
@@ -6501,12 +6137,10 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) {
define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_umax_i64_aligned_release:
-; -O0: subs x10, x8, x9
-; -O0: csel x12, x8, x9, hi
-; -O0: ldaxr x9, [x11]
-; -O0: cmp x9, x8
-; -O0: stlxr w10, x12, [x11]
-; -O0: subs x8, x9, x8
+; -O0: subs x9, x0, x8
+; -O0: csel x1, x0, x8, hi
+; -O0: bl __aarch64_cas8_rel
+; -O0: subs x8, x0, x8
;
; -O1-LABEL: atomicrmw_umax_i64_aligned_release:
; -O1: ldxr x0, [x8]
@@ -6519,12 +6153,10 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) {
define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_umax_i64_aligned_acq_rel:
-; -O0: subs x10, x8, x9
-; -O0: csel x12, x8, x9, hi
-; -O0: ldaxr x9, [x11]
-; -O0: cmp x9, x8
-; -O0: stlxr w10, x12, [x11]
-; -O0: subs x8, x9, x8
+; -O0: subs x9, x0, x8
+; -O0: csel x1, x0, x8, hi
+; -O0: bl __aarch64_cas8_acq_rel
+; -O0: subs x8, x0, x8
;
; -O1-LABEL: atomicrmw_umax_i64_aligned_acq_rel:
; -O1: ldaxr x0, [x8]
@@ -6537,12 +6169,10 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_umax_i64_aligned_seq_cst:
-; -O0: subs x10, x8, x9
-; -O0: csel x12, x8, x9, hi
-; -O0: ldaxr x9, [x11]
-; -O0: cmp x9, x8
-; -O0: stlxr w10, x12, [x11]
-; -O0: subs x8, x9, x8
+; -O0: subs x9, x0, x8
+; -O0: csel x1, x0, x8, hi
+; -O0: bl __aarch64_cas8_acq_rel
+; -O0: subs x8, x0, x8
;
; -O1-LABEL: atomicrmw_umax_i64_aligned_seq_cst:
; -O1: ldaxr x0, [x8]
@@ -6556,21 +6186,17 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic:
; -O0: subs x8, x8, x9
-; -O0: subs x8, x8, x12
-; -O0: subs x13, x13, x9
+; -O0: subs x8, x8, x11
+; -O0: subs x12, x12, x9
; -O0: csel w10, w8, w10, eq
-; -O0: ands w13, w10, #0x1
-; -O0: csel x14, x8, x12, ne
+; -O0: ands w12, w10, #0x1
+; -O0: csel x2, x8, x11, ne
; -O0: ands w10, w10, #0x1
-; -O0: csel x15, x8, x9, ne
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stxp w8, x14, x15, [x11]
-; -O0: stxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: csel x3, x8, x9, ne
+; -O0: bl __aarch64_cas16_relax
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic:
@@ -6586,21 +6212,17 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire:
; -O0: subs x8, x8, x9
-; -O0: subs x8, x8, x12
-; -O0: subs x13, x13, x9
+; -O0: subs x8, x8, x11
+; -O0: subs x12, x12, x9
; -O0: csel w10, w8, w10, eq
-; -O0: ands w13, w10, #0x1
-; -O0: csel x14, x8, x12, ne
+; -O0: ands w12, w10, #0x1
+; -O0: csel x2, x8, x11, ne
; -O0: ands w10, w10, #0x1
-; -O0: csel x15, x8, x9, ne
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stxp w8, x14, x15, [x11]
-; -O0: stxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: csel x3, x8, x9, ne
+; -O0: bl __aarch64_cas16_acq
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire:
@@ -6616,21 +6238,17 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_umax_i128_aligned_release:
; -O0: subs x8, x8, x9
-; -O0: subs x8, x8, x12
-; -O0: subs x13, x13, x9
+; -O0: subs x8, x8, x11
+; -O0: subs x12, x12, x9
; -O0: csel w10, w8, w10, eq
-; -O0: ands w13, w10, #0x1
-; -O0: csel x14, x8, x12, ne
+; -O0: ands w12, w10, #0x1
+; -O0: csel x2, x8, x11, ne
; -O0: ands w10, w10, #0x1
-; -O0: csel x15, x8, x9, ne
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: csel x3, x8, x9, ne
+; -O0: bl __aarch64_cas16_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_umax_i128_aligned_release:
@@ -6646,21 +6264,17 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
; -O0: subs x8, x8, x9
-; -O0: subs x8, x8, x12
-; -O0: subs x13, x13, x9
+; -O0: subs x8, x8, x11
+; -O0: subs x12, x12, x9
; -O0: csel w10, w8, w10, eq
-; -O0: ands w13, w10, #0x1
-; -O0: csel x14, x8, x12, ne
+; -O0: ands w12, w10, #0x1
+; -O0: csel x2, x8, x11, ne
; -O0: ands w10, w10, #0x1
-; -O0: csel x15, x8, x9, ne
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: csel x3, x8, x9, ne
+; -O0: bl __aarch64_cas16_acq_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
@@ -6676,21 +6290,17 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
; -O0: subs x8, x8, x9
-; -O0: subs x8, x8, x12
-; -O0: subs x13, x13, x9
+; -O0: subs x8, x8, x11
+; -O0: subs x12, x12, x9
; -O0: csel w10, w8, w10, eq
-; -O0: ands w13, w10, #0x1
-; -O0: csel x14, x8, x12, ne
+; -O0: ands w12, w10, #0x1
+; -O0: csel x2, x8, x11, ne
; -O0: ands w10, w10, #0x1
-; -O0: csel x15, x8, x9, ne
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: csel x3, x8, x9, ne
+; -O0: bl __aarch64_cas16_acq_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
@@ -6705,14 +6315,12 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value
define dso_local i8 @atomicrmw_umax_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_umax_i8_unaligned_monotonic:
-; -O0: and w9, w10, #0xff
+; -O0: and w9, w0, #0xff
; -O0: subs w9, w9, w8, uxtb
-; -O0: csel w12, w10, w8, hi
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, hi
+; -O0: bl __aarch64_cas1_relax
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_umax_i8_unaligned_monotonic:
; -O1: and w9, w1, #0xff
@@ -6726,14 +6334,12 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_monotonic(ptr %ptr, i8 %value)
define dso_local i8 @atomicrmw_umax_i8_unaligned_acquire(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_umax_i8_unaligned_acquire:
-; -O0: and w9, w10, #0xff
+; -O0: and w9, w0, #0xff
; -O0: subs w9, w9, w8, uxtb
-; -O0: csel w12, w10, w8, hi
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, hi
+; -O0: bl __aarch64_cas1_acq
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_umax_i8_unaligned_acquire:
; -O1: and w9, w1, #0xff
@@ -6747,14 +6353,12 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_acquire(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_umax_i8_unaligned_release(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_umax_i8_unaligned_release:
-; -O0: and w9, w10, #0xff
+; -O0: and w9, w0, #0xff
; -O0: subs w9, w9, w8, uxtb
-; -O0: csel w12, w10, w8, hi
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, hi
+; -O0: bl __aarch64_cas1_rel
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_umax_i8_unaligned_release:
; -O1: and w9, w1, #0xff
@@ -6768,14 +6372,12 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_release(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_umax_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_umax_i8_unaligned_acq_rel:
-; -O0: and w9, w10, #0xff
+; -O0: and w9, w0, #0xff
; -O0: subs w9, w9, w8, uxtb
-; -O0: csel w12, w10, w8, hi
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, hi
+; -O0: bl __aarch64_cas1_acq_rel
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_umax_i8_unaligned_acq_rel:
; -O1: and w9, w1, #0xff
@@ -6789,14 +6391,12 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_umax_i8_unaligned_seq_cst:
-; -O0: and w9, w10, #0xff
+; -O0: and w9, w0, #0xff
; -O0: subs w9, w9, w8, uxtb
-; -O0: csel w12, w10, w8, hi
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, hi
+; -O0: bl __aarch64_cas1_acq_rel
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_umax_i8_unaligned_seq_cst:
; -O1: and w9, w1, #0xff
@@ -7135,14 +6735,12 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
define dso_local i8 @atomicrmw_umin_i8_aligned_monotonic(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_umin_i8_aligned_monotonic:
-; -O0: and w9, w10, #0xff
+; -O0: and w9, w0, #0xff
; -O0: subs w9, w9, w8, uxtb
-; -O0: csel w12, w10, w8, ls
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, ls
+; -O0: bl __aarch64_cas1_relax
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_umin_i8_aligned_monotonic:
; -O1: and w9, w1, #0xff
@@ -7156,14 +6754,12 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_monotonic(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_umin_i8_aligned_acquire(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_umin_i8_aligned_acquire:
-; -O0: and w9, w10, #0xff
+; -O0: and w9, w0, #0xff
; -O0: subs w9, w9, w8, uxtb
-; -O0: csel w12, w10, w8, ls
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, ls
+; -O0: bl __aarch64_cas1_acq
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_umin_i8_aligned_acquire:
; -O1: and w9, w1, #0xff
@@ -7177,14 +6773,12 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_acquire(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_umin_i8_aligned_release(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_umin_i8_aligned_release:
-; -O0: and w9, w10, #0xff
+; -O0: and w9, w0, #0xff
; -O0: subs w9, w9, w8, uxtb
-; -O0: csel w12, w10, w8, ls
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, ls
+; -O0: bl __aarch64_cas1_rel
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_umin_i8_aligned_release:
; -O1: and w9, w1, #0xff
@@ -7198,14 +6792,12 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_release(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_umin_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_umin_i8_aligned_acq_rel:
-; -O0: and w9, w10, #0xff
+; -O0: and w9, w0, #0xff
; -O0: subs w9, w9, w8, uxtb
-; -O0: csel w12, w10, w8, ls
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, ls
+; -O0: bl __aarch64_cas1_acq_rel
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_umin_i8_aligned_acq_rel:
; -O1: and w9, w1, #0xff
@@ -7219,14 +6811,12 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_umin_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_umin_i8_aligned_seq_cst:
-; -O0: and w9, w10, #0xff
+; -O0: and w9, w0, #0xff
; -O0: subs w9, w9, w8, uxtb
-; -O0: csel w12, w10, w8, ls
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, ls
+; -O0: bl __aarch64_cas1_acq_rel
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_umin_i8_aligned_seq_cst:
; -O1: and w9, w1, #0xff
@@ -7240,12 +6830,10 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_umin_i16_aligned_monotonic:
-; -O0: subs w10, w10, w9, uxth
-; -O0: csel w12, w8, w9, ls
-; -O0: ldaxrh w9, [x11]
-; -O0: cmp w9, w8, uxth
-; -O0: stlxrh w10, w12, [x11]
-; -O0: subs w8, w8, w9, uxth
+; -O0: subs w9, w9, w8, uxth
+; -O0: csel w1, w0, w8, ls
+; -O0: bl __aarch64_cas2_relax
+; -O0: subs w8, w8, w0, uxth
;
; -O1-LABEL: atomicrmw_umin_i16_aligned_monotonic:
; -O1: and w9, w1, #0xffff
@@ -7259,12 +6847,10 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value)
define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_umin_i16_aligned_acquire:
-; -O0: subs w10, w10, w9, uxth
-; -O0: csel w12, w8, w9, ls
-; -O0: ldaxrh w9, [x11]
-; -O0: cmp w9, w8, uxth
-; -O0: stlxrh w10, w12, [x11]
-; -O0: subs w8, w8, w9, uxth
+; -O0: subs w9, w9, w8, uxth
+; -O0: csel w1, w0, w8, ls
+; -O0: bl __aarch64_cas2_acq
+; -O0: subs w8, w8, w0, uxth
;
; -O1-LABEL: atomicrmw_umin_i16_aligned_acquire:
; -O1: and w9, w1, #0xffff
@@ -7278,12 +6864,10 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) {
define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_umin_i16_aligned_release:
-; -O0: subs w10, w10, w9, uxth
-; -O0: csel w12, w8, w9, ls
-; -O0: ldaxrh w9, [x11]
-; -O0: cmp w9, w8, uxth
-; -O0: stlxrh w10, w12, [x11]
-; -O0: subs w8, w8, w9, uxth
+; -O0: subs w9, w9, w8, uxth
+; -O0: csel w1, w0, w8, ls
+; -O0: bl __aarch64_cas2_rel
+; -O0: subs w8, w8, w0, uxth
;
; -O1-LABEL: atomicrmw_umin_i16_aligned_release:
; -O1: and w9, w1, #0xffff
@@ -7297,12 +6881,10 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) {
define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_umin_i16_aligned_acq_rel:
-; -O0: subs w10, w10, w9, uxth
-; -O0: csel w12, w8, w9, ls
-; -O0: ldaxrh w9, [x11]
-; -O0: cmp w9, w8, uxth
-; -O0: stlxrh w10, w12, [x11]
-; -O0: subs w8, w8, w9, uxth
+; -O0: subs w9, w9, w8, uxth
+; -O0: csel w1, w0, w8, ls
+; -O0: bl __aarch64_cas2_acq_rel
+; -O0: subs w8, w8, w0, uxth
;
; -O1-LABEL: atomicrmw_umin_i16_aligned_acq_rel:
; -O1: and w9, w1, #0xffff
@@ -7316,12 +6898,10 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_umin_i16_aligned_seq_cst:
-; -O0: subs w10, w10, w9, uxth
-; -O0: csel w12, w8, w9, ls
-; -O0: ldaxrh w9, [x11]
-; -O0: cmp w9, w8, uxth
-; -O0: stlxrh w10, w12, [x11]
-; -O0: subs w8, w8, w9, uxth
+; -O0: subs w9, w9, w8, uxth
+; -O0: csel w1, w0, w8, ls
+; -O0: bl __aarch64_cas2_acq_rel
+; -O0: subs w8, w8, w0, uxth
;
; -O1-LABEL: atomicrmw_umin_i16_aligned_seq_cst:
; -O1: and w9, w1, #0xffff
@@ -7335,12 +6915,10 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_umin_i32_aligned_monotonic:
-; -O0: subs w10, w8, w9
-; -O0: csel w12, w8, w9, ls
-; -O0: ldaxr w9, [x11]
-; -O0: cmp w9, w8
-; -O0: stlxr w10, w12, [x11]
-; -O0: subs w8, w9, w8
+; -O0: subs w9, w0, w8
+; -O0: csel w1, w0, w8, ls
+; -O0: bl __aarch64_cas4_relax
+; -O0: subs w8, w0, w8
;
; -O1-LABEL: atomicrmw_umin_i32_aligned_monotonic:
; -O1: ldxr w8, [x0]
@@ -7353,12 +6931,10 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value)
define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_umin_i32_aligned_acquire:
-; -O0: subs w10, w8, w9
-; -O0: csel w12, w8, w9, ls
-; -O0: ldaxr w9, [x11]
-; -O0: cmp w9, w8
-; -O0: stlxr w10, w12, [x11]
-; -O0: subs w8, w9, w8
+; -O0: subs w9, w0, w8
+; -O0: csel w1, w0, w8, ls
+; -O0: bl __aarch64_cas4_acq
+; -O0: subs w8, w0, w8
;
; -O1-LABEL: atomicrmw_umin_i32_aligned_acquire:
; -O1: ldaxr w8, [x0]
@@ -7371,12 +6947,10 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) {
define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_umin_i32_aligned_release:
-; -O0: subs w10, w8, w9
-; -O0: csel w12, w8, w9, ls
-; -O0: ldaxr w9, [x11]
-; -O0: cmp w9, w8
-; -O0: stlxr w10, w12, [x11]
-; -O0: subs w8, w9, w8
+; -O0: subs w9, w0, w8
+; -O0: csel w1, w0, w8, ls
+; -O0: bl __aarch64_cas4_rel
+; -O0: subs w8, w0, w8
;
; -O1-LABEL: atomicrmw_umin_i32_aligned_release:
; -O1: ldxr w8, [x0]
@@ -7389,12 +6963,10 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) {
define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_umin_i32_aligned_acq_rel:
-; -O0: subs w10, w8, w9
-; -O0: csel w12, w8, w9, ls
-; -O0: ldaxr w9, [x11]
-; -O0: cmp w9, w8
-; -O0: stlxr w10, w12, [x11]
-; -O0: subs w8, w9, w8
+; -O0: subs w9, w0, w8
+; -O0: csel w1, w0, w8, ls
+; -O0: bl __aarch64_cas4_acq_rel
+; -O0: subs w8, w0, w8
;
; -O1-LABEL: atomicrmw_umin_i32_aligned_acq_rel:
; -O1: ldaxr w8, [x0]
@@ -7407,12 +6979,10 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_umin_i32_aligned_seq_cst:
-; -O0: subs w10, w8, w9
-; -O0: csel w12, w8, w9, ls
-; -O0: ldaxr w9, [x11]
-; -O0: cmp w9, w8
-; -O0: stlxr w10, w12, [x11]
-; -O0: subs w8, w9, w8
+; -O0: subs w9, w0, w8
+; -O0: csel w1, w0, w8, ls
+; -O0: bl __aarch64_cas4_acq_rel
+; -O0: subs w8, w0, w8
;
; -O1-LABEL: atomicrmw_umin_i32_aligned_seq_cst:
; -O1: ldaxr w8, [x0]
@@ -7425,12 +6995,10 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_umin_i64_aligned_monotonic:
-; -O0: subs x10, x8, x9
-; -O0: csel x12, x8, x9, ls
-; -O0: ldaxr x9, [x11]
-; -O0: cmp x9, x8
-; -O0: stlxr w10, x12, [x11]
-; -O0: subs x8, x9, x8
+; -O0: subs x9, x0, x8
+; -O0: csel x1, x0, x8, ls
+; -O0: bl __aarch64_cas8_relax
+; -O0: subs x8, x0, x8
;
; -O1-LABEL: atomicrmw_umin_i64_aligned_monotonic:
; -O1: ldxr x0, [x8]
@@ -7443,12 +7011,10 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value)
define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_umin_i64_aligned_acquire:
-; -O0: subs x10, x8, x9
-; -O0: csel x12, x8, x9, ls
-; -O0: ldaxr x9, [x11]
-; -O0: cmp x9, x8
-; -O0: stlxr w10, x12, [x11]
-; -O0: subs x8, x9, x8
+; -O0: subs x9, x0, x8
+; -O0: csel x1, x0, x8, ls
+; -O0: bl __aarch64_cas8_acq
+; -O0: subs x8, x0, x8
;
; -O1-LABEL: atomicrmw_umin_i64_aligned_acquire:
; -O1: ldaxr x0, [x8]
@@ -7461,12 +7027,10 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) {
define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_umin_i64_aligned_release:
-; -O0: subs x10, x8, x9
-; -O0: csel x12, x8, x9, ls
-; -O0: ldaxr x9, [x11]
-; -O0: cmp x9, x8
-; -O0: stlxr w10, x12, [x11]
-; -O0: subs x8, x9, x8
+; -O0: subs x9, x0, x8
+; -O0: csel x1, x0, x8, ls
+; -O0: bl __aarch64_cas8_rel
+; -O0: subs x8, x0, x8
;
; -O1-LABEL: atomicrmw_umin_i64_aligned_release:
; -O1: ldxr x0, [x8]
@@ -7479,12 +7043,10 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) {
define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_umin_i64_aligned_acq_rel:
-; -O0: subs x10, x8, x9
-; -O0: csel x12, x8, x9, ls
-; -O0: ldaxr x9, [x11]
-; -O0: cmp x9, x8
-; -O0: stlxr w10, x12, [x11]
-; -O0: subs x8, x9, x8
+; -O0: subs x9, x0, x8
+; -O0: csel x1, x0, x8, ls
+; -O0: bl __aarch64_cas8_acq_rel
+; -O0: subs x8, x0, x8
;
; -O1-LABEL: atomicrmw_umin_i64_aligned_acq_rel:
; -O1: ldaxr x0, [x8]
@@ -7497,12 +7059,10 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_umin_i64_aligned_seq_cst:
-; -O0: subs x10, x8, x9
-; -O0: csel x12, x8, x9, ls
-; -O0: ldaxr x9, [x11]
-; -O0: cmp x9, x8
-; -O0: stlxr w10, x12, [x11]
-; -O0: subs x8, x9, x8
+; -O0: subs x9, x0, x8
+; -O0: csel x1, x0, x8, ls
+; -O0: bl __aarch64_cas8_acq_rel
+; -O0: subs x8, x0, x8
;
; -O1-LABEL: atomicrmw_umin_i64_aligned_seq_cst:
; -O1: ldaxr x0, [x8]
@@ -7516,21 +7076,17 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic:
; -O0: subs x8, x8, x9
-; -O0: subs x8, x8, x12
-; -O0: subs x13, x13, x9
+; -O0: subs x8, x8, x11
+; -O0: subs x12, x12, x9
; -O0: csel w10, w8, w10, eq
-; -O0: ands w13, w10, #0x1
-; -O0: csel x14, x8, x12, ne
+; -O0: ands w12, w10, #0x1
+; -O0: csel x2, x8, x11, ne
; -O0: ands w10, w10, #0x1
-; -O0: csel x15, x8, x9, ne
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stxp w8, x14, x15, [x11]
-; -O0: stxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: csel x3, x8, x9, ne
+; -O0: bl __aarch64_cas16_relax
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic:
@@ -7546,21 +7102,17 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire:
; -O0: subs x8, x8, x9
-; -O0: subs x8, x8, x12
-; -O0: subs x13, x13, x9
+; -O0: subs x8, x8, x11
+; -O0: subs x12, x12, x9
; -O0: csel w10, w8, w10, eq
-; -O0: ands w13, w10, #0x1
-; -O0: csel x14, x8, x12, ne
+; -O0: ands w12, w10, #0x1
+; -O0: csel x2, x8, x11, ne
; -O0: ands w10, w10, #0x1
-; -O0: csel x15, x8, x9, ne
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stxp w8, x14, x15, [x11]
-; -O0: stxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: csel x3, x8, x9, ne
+; -O0: bl __aarch64_cas16_acq
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire:
@@ -7576,21 +7128,17 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_umin_i128_aligned_release:
; -O0: subs x8, x8, x9
-; -O0: subs x8, x8, x12
-; -O0: subs x13, x13, x9
+; -O0: subs x8, x8, x11
+; -O0: subs x12, x12, x9
; -O0: csel w10, w8, w10, eq
-; -O0: ands w13, w10, #0x1
-; -O0: csel x14, x8, x12, ne
+; -O0: ands w12, w10, #0x1
+; -O0: csel x2, x8, x11, ne
; -O0: ands w10, w10, #0x1
-; -O0: csel x15, x8, x9, ne
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: csel x3, x8, x9, ne
+; -O0: bl __aarch64_cas16_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_umin_i128_aligned_release:
@@ -7606,21 +7154,17 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
; -O0: subs x8, x8, x9
-; -O0: subs x8, x8, x12
-; -O0: subs x13, x13, x9
+; -O0: subs x8, x8, x11
+; -O0: subs x12, x12, x9
; -O0: csel w10, w8, w10, eq
-; -O0: ands w13, w10, #0x1
-; -O0: csel x14, x8, x12, ne
+; -O0: ands w12, w10, #0x1
+; -O0: csel x2, x8, x11, ne
; -O0: ands w10, w10, #0x1
-; -O0: csel x15, x8, x9, ne
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: csel x3, x8, x9, ne
+; -O0: bl __aarch64_cas16_acq_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
@@ -7636,21 +7180,17 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
; -O0: subs x8, x8, x9
-; -O0: subs x8, x8, x12
-; -O0: subs x13, x13, x9
+; -O0: subs x8, x8, x11
+; -O0: subs x12, x12, x9
; -O0: csel w10, w8, w10, eq
-; -O0: ands w13, w10, #0x1
-; -O0: csel x14, x8, x12, ne
+; -O0: ands w12, w10, #0x1
+; -O0: csel x2, x8, x11, ne
; -O0: ands w10, w10, #0x1
-; -O0: csel x15, x8, x9, ne
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: csel x3, x8, x9, ne
+; -O0: bl __aarch64_cas16_acq_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
@@ -7665,14 +7205,12 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value
define dso_local i8 @atomicrmw_umin_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_umin_i8_unaligned_monotonic:
-; -O0: and w9, w10, #0xff
+; -O0: and w9, w0, #0xff
; -O0: subs w9, w9, w8, uxtb
-; -O0: csel w12, w10, w8, ls
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, ls
+; -O0: bl __aarch64_cas1_relax
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_umin_i8_unaligned_monotonic:
; -O1: and w9, w1, #0xff
@@ -7686,14 +7224,12 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_monotonic(ptr %ptr, i8 %value)
define dso_local i8 @atomicrmw_umin_i8_unaligned_acquire(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_umin_i8_unaligned_acquire:
-; -O0: and w9, w10, #0xff
+; -O0: and w9, w0, #0xff
; -O0: subs w9, w9, w8, uxtb
-; -O0: csel w12, w10, w8, ls
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, ls
+; -O0: bl __aarch64_cas1_acq
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_umin_i8_unaligned_acquire:
; -O1: and w9, w1, #0xff
@@ -7707,14 +7243,12 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_acquire(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_umin_i8_unaligned_release(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_umin_i8_unaligned_release:
-; -O0: and w9, w10, #0xff
+; -O0: and w9, w0, #0xff
; -O0: subs w9, w9, w8, uxtb
-; -O0: csel w12, w10, w8, ls
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, ls
+; -O0: bl __aarch64_cas1_rel
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_umin_i8_unaligned_release:
; -O1: and w9, w1, #0xff
@@ -7728,14 +7262,12 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_release(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_umin_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_umin_i8_unaligned_acq_rel:
-; -O0: and w9, w10, #0xff
+; -O0: and w9, w0, #0xff
; -O0: subs w9, w9, w8, uxtb
-; -O0: csel w12, w10, w8, ls
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, ls
+; -O0: bl __aarch64_cas1_acq_rel
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_umin_i8_unaligned_acq_rel:
; -O1: and w9, w1, #0xff
@@ -7749,14 +7281,12 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_umin_i8_unaligned_seq_cst:
-; -O0: and w9, w10, #0xff
+; -O0: and w9, w0, #0xff
; -O0: subs w9, w9, w8, uxtb
-; -O0: csel w12, w10, w8, ls
-; -O0: ldaxrb w9, [x11]
-; -O0: cmp w9, w10, uxtb
-; -O0: stlxrb w8, w12, [x11]
-; -O0: and w8, w9, #0xff
-; -O0: subs w8, w8, w10, uxtb
+; -O0: csel w1, w0, w8, ls
+; -O0: bl __aarch64_cas1_acq_rel
+; -O0: and w8, w0, #0xff
+; -O0: subs w8, w8, w9, uxtb
;
; -O1-LABEL: atomicrmw_umin_i8_unaligned_seq_cst:
; -O1: and w9, w1, #0xff
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-cmpxchg-outline_atomics.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-cmpxchg-outline_atomics.ll
index 403e4770e17f9c..86c040cc359359 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-cmpxchg-outline_atomics.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-cmpxchg-outline_atomics.ll
@@ -4,2400 +4,1440 @@
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+outline-atomics -O1 | FileCheck %s --check-prefixes=CHECK,-O1
define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_aligned_monotonic_monotonic:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_aligned_monotonic_monotonic:
-; -O1: bl __aarch64_cas1_relax
+; CHECK-LABEL: cmpxchg_i8_aligned_monotonic_monotonic:
+; CHECK: bl __aarch64_cas1_relax
%pair = cmpxchg ptr %ptr, i8 %expected, i8 %new monotonic monotonic, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_aligned_monotonic_monotonic_weak:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_aligned_monotonic_monotonic_weak:
-; -O1: bl __aarch64_cas1_relax
+; CHECK-LABEL: cmpxchg_i8_aligned_monotonic_monotonic_weak:
+; CHECK: bl __aarch64_cas1_relax
%pair = cmpxchg weak ptr %ptr, i8 %expected, i8 %new monotonic monotonic, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_aligned_monotonic_acquire:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_aligned_monotonic_acquire:
-; -O1: bl __aarch64_cas1_acq
+; CHECK-LABEL: cmpxchg_i8_aligned_monotonic_acquire:
+; CHECK: bl __aarch64_cas1_acq
%pair = cmpxchg ptr %ptr, i8 %expected, i8 %new monotonic acquire, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_aligned_monotonic_acquire_weak:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_aligned_monotonic_acquire_weak:
-; -O1: bl __aarch64_cas1_acq
+; CHECK-LABEL: cmpxchg_i8_aligned_monotonic_acquire_weak:
+; CHECK: bl __aarch64_cas1_acq
%pair = cmpxchg weak ptr %ptr, i8 %expected, i8 %new monotonic acquire, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg ptr %ptr, i8 %expected, i8 %new monotonic seq_cst, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst_weak:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst_weak:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst_weak:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg weak ptr %ptr, i8 %expected, i8 %new monotonic seq_cst, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_aligned_acquire_monotonic:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_aligned_acquire_monotonic:
-; -O1: bl __aarch64_cas1_acq
+; CHECK-LABEL: cmpxchg_i8_aligned_acquire_monotonic:
+; CHECK: bl __aarch64_cas1_acq
%pair = cmpxchg ptr %ptr, i8 %expected, i8 %new acquire monotonic, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_aligned_acquire_monotonic_weak:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_aligned_acquire_monotonic_weak:
-; -O1: bl __aarch64_cas1_acq
+; CHECK-LABEL: cmpxchg_i8_aligned_acquire_monotonic_weak:
+; CHECK: bl __aarch64_cas1_acq
%pair = cmpxchg weak ptr %ptr, i8 %expected, i8 %new acquire monotonic, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_aligned_acquire_acquire:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_aligned_acquire_acquire:
-; -O1: bl __aarch64_cas1_acq
+; CHECK-LABEL: cmpxchg_i8_aligned_acquire_acquire:
+; CHECK: bl __aarch64_cas1_acq
%pair = cmpxchg ptr %ptr, i8 %expected, i8 %new acquire acquire, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_aligned_acquire_acquire_weak:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_aligned_acquire_acquire_weak:
-; -O1: bl __aarch64_cas1_acq
+; CHECK-LABEL: cmpxchg_i8_aligned_acquire_acquire_weak:
+; CHECK: bl __aarch64_cas1_acq
%pair = cmpxchg weak ptr %ptr, i8 %expected, i8 %new acquire acquire, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_aligned_acquire_seq_cst:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_aligned_acquire_seq_cst:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_aligned_acquire_seq_cst:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg ptr %ptr, i8 %expected, i8 %new acquire seq_cst, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_aligned_acquire_seq_cst_weak:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_aligned_acquire_seq_cst_weak:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_aligned_acquire_seq_cst_weak:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg weak ptr %ptr, i8 %expected, i8 %new acquire seq_cst, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_aligned_release_monotonic(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_aligned_release_monotonic:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_aligned_release_monotonic:
-; -O1: bl __aarch64_cas1_rel
+; CHECK-LABEL: cmpxchg_i8_aligned_release_monotonic:
+; CHECK: bl __aarch64_cas1_rel
%pair = cmpxchg ptr %ptr, i8 %expected, i8 %new release monotonic, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_aligned_release_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_aligned_release_monotonic_weak:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_aligned_release_monotonic_weak:
-; -O1: bl __aarch64_cas1_rel
+; CHECK-LABEL: cmpxchg_i8_aligned_release_monotonic_weak:
+; CHECK: bl __aarch64_cas1_rel
%pair = cmpxchg weak ptr %ptr, i8 %expected, i8 %new release monotonic, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_aligned_release_acquire(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_aligned_release_acquire:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_aligned_release_acquire:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_aligned_release_acquire:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg ptr %ptr, i8 %expected, i8 %new release acquire, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_aligned_release_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_aligned_release_acquire_weak:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_aligned_release_acquire_weak:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_aligned_release_acquire_weak:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg weak ptr %ptr, i8 %expected, i8 %new release acquire, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_aligned_release_seq_cst:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_aligned_release_seq_cst:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_aligned_release_seq_cst:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg ptr %ptr, i8 %expected, i8 %new release seq_cst, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_aligned_release_seq_cst_weak:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_aligned_release_seq_cst_weak:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_aligned_release_seq_cst_weak:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg weak ptr %ptr, i8 %expected, i8 %new release seq_cst, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg ptr %ptr, i8 %expected, i8 %new acq_rel monotonic, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic_weak:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic_weak:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic_weak:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg weak ptr %ptr, i8 %expected, i8 %new acq_rel monotonic, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_acquire:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_acquire:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_aligned_acq_rel_acquire:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg ptr %ptr, i8 %expected, i8 %new acq_rel acquire, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_acquire_weak:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_acquire_weak:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_aligned_acq_rel_acquire_weak:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg weak ptr %ptr, i8 %expected, i8 %new acq_rel acquire, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg ptr %ptr, i8 %expected, i8 %new acq_rel seq_cst, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst_weak:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst_weak:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst_weak:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg weak ptr %ptr, i8 %expected, i8 %new acq_rel seq_cst, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg ptr %ptr, i8 %expected, i8 %new seq_cst monotonic, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic_weak:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic_weak:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic_weak:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg weak ptr %ptr, i8 %expected, i8 %new seq_cst monotonic, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_acquire:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_acquire:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_aligned_seq_cst_acquire:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg ptr %ptr, i8 %expected, i8 %new seq_cst acquire, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_acquire_weak:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_acquire_weak:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_aligned_seq_cst_acquire_weak:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg weak ptr %ptr, i8 %expected, i8 %new seq_cst acquire, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg ptr %ptr, i8 %expected, i8 %new seq_cst seq_cst, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst_weak:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst_weak:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst_weak:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg weak ptr %ptr, i8 %expected, i8 %new seq_cst seq_cst, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic(i16 %expected, i16 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i16_aligned_monotonic_monotonic:
-; -O0: ldaxrh w0, [x2]
-; -O0: cmp w0, w9, uxth
-; -O0: stlxrh w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i16_aligned_monotonic_monotonic:
-; -O1: bl __aarch64_cas2_relax
+; CHECK-LABEL: cmpxchg_i16_aligned_monotonic_monotonic:
+; CHECK: bl __aarch64_cas2_relax
%pair = cmpxchg ptr %ptr, i16 %expected, i16 %new monotonic monotonic, align 2
%r = extractvalue { i16, i1 } %pair, 0
ret i16 %r
}
define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i16_aligned_monotonic_monotonic_weak:
-; -O0: ldaxrh w0, [x2]
-; -O0: cmp w0, w9, uxth
-; -O0: stlxrh w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i16_aligned_monotonic_monotonic_weak:
-; -O1: bl __aarch64_cas2_relax
+; CHECK-LABEL: cmpxchg_i16_aligned_monotonic_monotonic_weak:
+; CHECK: bl __aarch64_cas2_relax
%pair = cmpxchg weak ptr %ptr, i16 %expected, i16 %new monotonic monotonic, align 2
%r = extractvalue { i16, i1 } %pair, 0
ret i16 %r
}
define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire(i16 %expected, i16 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i16_aligned_monotonic_acquire:
-; -O0: ldaxrh w0, [x2]
-; -O0: cmp w0, w9, uxth
-; -O0: stlxrh w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i16_aligned_monotonic_acquire:
-; -O1: bl __aarch64_cas2_acq
+; CHECK-LABEL: cmpxchg_i16_aligned_monotonic_acquire:
+; CHECK: bl __aarch64_cas2_acq
%pair = cmpxchg ptr %ptr, i16 %expected, i16 %new monotonic acquire, align 2
%r = extractvalue { i16, i1 } %pair, 0
ret i16 %r
}
define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i16_aligned_monotonic_acquire_weak:
-; -O0: ldaxrh w0, [x2]
-; -O0: cmp w0, w9, uxth
-; -O0: stlxrh w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i16_aligned_monotonic_acquire_weak:
-; -O1: bl __aarch64_cas2_acq
+; CHECK-LABEL: cmpxchg_i16_aligned_monotonic_acquire_weak:
+; CHECK: bl __aarch64_cas2_acq
%pair = cmpxchg weak ptr %ptr, i16 %expected, i16 %new monotonic acquire, align 2
%r = extractvalue { i16, i1 } %pair, 0
ret i16 %r
}
define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst:
-; -O0: ldaxrh w0, [x2]
-; -O0: cmp w0, w9, uxth
-; -O0: stlxrh w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst:
-; -O1: bl __aarch64_cas2_acq_rel
+; CHECK-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst:
+; CHECK: bl __aarch64_cas2_acq_rel
%pair = cmpxchg ptr %ptr, i16 %expected, i16 %new monotonic seq_cst, align 2
%r = extractvalue { i16, i1 } %pair, 0
ret i16 %r
}
define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst_weak:
-; -O0: ldaxrh w0, [x2]
-; -O0: cmp w0, w9, uxth
-; -O0: stlxrh w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst_weak:
-; -O1: bl __aarch64_cas2_acq_rel
+; CHECK-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst_weak:
+; CHECK: bl __aarch64_cas2_acq_rel
%pair = cmpxchg weak ptr %ptr, i16 %expected, i16 %new monotonic seq_cst, align 2
%r = extractvalue { i16, i1 } %pair, 0
ret i16 %r
}
define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic(i16 %expected, i16 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i16_aligned_acquire_monotonic:
-; -O0: ldaxrh w0, [x2]
-; -O0: cmp w0, w9, uxth
-; -O0: stlxrh w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i16_aligned_acquire_monotonic:
-; -O1: bl __aarch64_cas2_acq
+; CHECK-LABEL: cmpxchg_i16_aligned_acquire_monotonic:
+; CHECK: bl __aarch64_cas2_acq
%pair = cmpxchg ptr %ptr, i16 %expected, i16 %new acquire monotonic, align 2
%r = extractvalue { i16, i1 } %pair, 0
ret i16 %r
}
define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i16_aligned_acquire_monotonic_weak:
-; -O0: ldaxrh w0, [x2]
-; -O0: cmp w0, w9, uxth
-; -O0: stlxrh w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i16_aligned_acquire_monotonic_weak:
-; -O1: bl __aarch64_cas2_acq
+; CHECK-LABEL: cmpxchg_i16_aligned_acquire_monotonic_weak:
+; CHECK: bl __aarch64_cas2_acq
%pair = cmpxchg weak ptr %ptr, i16 %expected, i16 %new acquire monotonic, align 2
%r = extractvalue { i16, i1 } %pair, 0
ret i16 %r
}
define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire(i16 %expected, i16 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i16_aligned_acquire_acquire:
-; -O0: ldaxrh w0, [x2]
-; -O0: cmp w0, w9, uxth
-; -O0: stlxrh w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i16_aligned_acquire_acquire:
-; -O1: bl __aarch64_cas2_acq
+; CHECK-LABEL: cmpxchg_i16_aligned_acquire_acquire:
+; CHECK: bl __aarch64_cas2_acq
%pair = cmpxchg ptr %ptr, i16 %expected, i16 %new acquire acquire, align 2
%r = extractvalue { i16, i1 } %pair, 0
ret i16 %r
}
define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i16_aligned_acquire_acquire_weak:
-; -O0: ldaxrh w0, [x2]
-; -O0: cmp w0, w9, uxth
-; -O0: stlxrh w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i16_aligned_acquire_acquire_weak:
-; -O1: bl __aarch64_cas2_acq
+; CHECK-LABEL: cmpxchg_i16_aligned_acquire_acquire_weak:
+; CHECK: bl __aarch64_cas2_acq
%pair = cmpxchg weak ptr %ptr, i16 %expected, i16 %new acquire acquire, align 2
%r = extractvalue { i16, i1 } %pair, 0
ret i16 %r
}
define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i16_aligned_acquire_seq_cst:
-; -O0: ldaxrh w0, [x2]
-; -O0: cmp w0, w9, uxth
-; -O0: stlxrh w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i16_aligned_acquire_seq_cst:
-; -O1: bl __aarch64_cas2_acq_rel
+; CHECK-LABEL: cmpxchg_i16_aligned_acquire_seq_cst:
+; CHECK: bl __aarch64_cas2_acq_rel
%pair = cmpxchg ptr %ptr, i16 %expected, i16 %new acquire seq_cst, align 2
%r = extractvalue { i16, i1 } %pair, 0
ret i16 %r
}
define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i16_aligned_acquire_seq_cst_weak:
-; -O0: ldaxrh w0, [x2]
-; -O0: cmp w0, w9, uxth
-; -O0: stlxrh w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i16_aligned_acquire_seq_cst_weak:
-; -O1: bl __aarch64_cas2_acq_rel
+; CHECK-LABEL: cmpxchg_i16_aligned_acquire_seq_cst_weak:
+; CHECK: bl __aarch64_cas2_acq_rel
%pair = cmpxchg weak ptr %ptr, i16 %expected, i16 %new acquire seq_cst, align 2
%r = extractvalue { i16, i1 } %pair, 0
ret i16 %r
}
define dso_local i16 @cmpxchg_i16_aligned_release_monotonic(i16 %expected, i16 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i16_aligned_release_monotonic:
-; -O0: ldaxrh w0, [x2]
-; -O0: cmp w0, w9, uxth
-; -O0: stlxrh w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i16_aligned_release_monotonic:
-; -O1: bl __aarch64_cas2_rel
+; CHECK-LABEL: cmpxchg_i16_aligned_release_monotonic:
+; CHECK: bl __aarch64_cas2_rel
%pair = cmpxchg ptr %ptr, i16 %expected, i16 %new release monotonic, align 2
%r = extractvalue { i16, i1 } %pair, 0
ret i16 %r
}
define dso_local i16 @cmpxchg_i16_aligned_release_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i16_aligned_release_monotonic_weak:
-; -O0: ldaxrh w0, [x2]
-; -O0: cmp w0, w9, uxth
-; -O0: stlxrh w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i16_aligned_release_monotonic_weak:
-; -O1: bl __aarch64_cas2_rel
+; CHECK-LABEL: cmpxchg_i16_aligned_release_monotonic_weak:
+; CHECK: bl __aarch64_cas2_rel
%pair = cmpxchg weak ptr %ptr, i16 %expected, i16 %new release monotonic, align 2
%r = extractvalue { i16, i1 } %pair, 0
ret i16 %r
}
define dso_local i16 @cmpxchg_i16_aligned_release_acquire(i16 %expected, i16 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i16_aligned_release_acquire:
-; -O0: ldaxrh w0, [x2]
-; -O0: cmp w0, w9, uxth
-; -O0: stlxrh w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i16_aligned_release_acquire:
-; -O1: bl __aarch64_cas2_acq_rel
+; CHECK-LABEL: cmpxchg_i16_aligned_release_acquire:
+; CHECK: bl __aarch64_cas2_acq_rel
%pair = cmpxchg ptr %ptr, i16 %expected, i16 %new release acquire, align 2
%r = extractvalue { i16, i1 } %pair, 0
ret i16 %r
}
define dso_local i16 @cmpxchg_i16_aligned_release_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i16_aligned_release_acquire_weak:
-; -O0: ldaxrh w0, [x2]
-; -O0: cmp w0, w9, uxth
-; -O0: stlxrh w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i16_aligned_release_acquire_weak:
-; -O1: bl __aarch64_cas2_acq_rel
+; CHECK-LABEL: cmpxchg_i16_aligned_release_acquire_weak:
+; CHECK: bl __aarch64_cas2_acq_rel
%pair = cmpxchg weak ptr %ptr, i16 %expected, i16 %new release acquire, align 2
%r = extractvalue { i16, i1 } %pair, 0
ret i16 %r
}
define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i16_aligned_release_seq_cst:
-; -O0: ldaxrh w0, [x2]
-; -O0: cmp w0, w9, uxth
-; -O0: stlxrh w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i16_aligned_release_seq_cst:
-; -O1: bl __aarch64_cas2_acq_rel
+; CHECK-LABEL: cmpxchg_i16_aligned_release_seq_cst:
+; CHECK: bl __aarch64_cas2_acq_rel
%pair = cmpxchg ptr %ptr, i16 %expected, i16 %new release seq_cst, align 2
%r = extractvalue { i16, i1 } %pair, 0
ret i16 %r
}
define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i16_aligned_release_seq_cst_weak:
-; -O0: ldaxrh w0, [x2]
-; -O0: cmp w0, w9, uxth
-; -O0: stlxrh w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i16_aligned_release_seq_cst_weak:
-; -O1: bl __aarch64_cas2_acq_rel
+; CHECK-LABEL: cmpxchg_i16_aligned_release_seq_cst_weak:
+; CHECK: bl __aarch64_cas2_acq_rel
%pair = cmpxchg weak ptr %ptr, i16 %expected, i16 %new release seq_cst, align 2
%r = extractvalue { i16, i1 } %pair, 0
ret i16 %r
}
define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic(i16 %expected, i16 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic:
-; -O0: ldaxrh w0, [x2]
-; -O0: cmp w0, w9, uxth
-; -O0: stlxrh w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic:
-; -O1: bl __aarch64_cas2_acq_rel
+; CHECK-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic:
+; CHECK: bl __aarch64_cas2_acq_rel
%pair = cmpxchg ptr %ptr, i16 %expected, i16 %new acq_rel monotonic, align 2
%r = extractvalue { i16, i1 } %pair, 0
ret i16 %r
}
define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic_weak:
-; -O0: ldaxrh w0, [x2]
-; -O0: cmp w0, w9, uxth
-; -O0: stlxrh w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic_weak:
-; -O1: bl __aarch64_cas2_acq_rel
+; CHECK-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic_weak:
+; CHECK: bl __aarch64_cas2_acq_rel
%pair = cmpxchg weak ptr %ptr, i16 %expected, i16 %new acq_rel monotonic, align 2
%r = extractvalue { i16, i1 } %pair, 0
ret i16 %r
}
define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire(i16 %expected, i16 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_acquire:
-; -O0: ldaxrh w0, [x2]
-; -O0: cmp w0, w9, uxth
-; -O0: stlxrh w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_acquire:
-; -O1: bl __aarch64_cas2_acq_rel
+; CHECK-LABEL: cmpxchg_i16_aligned_acq_rel_acquire:
+; CHECK: bl __aarch64_cas2_acq_rel
%pair = cmpxchg ptr %ptr, i16 %expected, i16 %new acq_rel acquire, align 2
%r = extractvalue { i16, i1 } %pair, 0
ret i16 %r
}
define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_acquire_weak:
-; -O0: ldaxrh w0, [x2]
-; -O0: cmp w0, w9, uxth
-; -O0: stlxrh w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_acquire_weak:
-; -O1: bl __aarch64_cas2_acq_rel
+; CHECK-LABEL: cmpxchg_i16_aligned_acq_rel_acquire_weak:
+; CHECK: bl __aarch64_cas2_acq_rel
%pair = cmpxchg weak ptr %ptr, i16 %expected, i16 %new acq_rel acquire, align 2
%r = extractvalue { i16, i1 } %pair, 0
ret i16 %r
}
define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst:
-; -O0: ldaxrh w0, [x2]
-; -O0: cmp w0, w9, uxth
-; -O0: stlxrh w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst:
-; -O1: bl __aarch64_cas2_acq_rel
+; CHECK-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst:
+; CHECK: bl __aarch64_cas2_acq_rel
%pair = cmpxchg ptr %ptr, i16 %expected, i16 %new acq_rel seq_cst, align 2
%r = extractvalue { i16, i1 } %pair, 0
ret i16 %r
}
define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst_weak:
-; -O0: ldaxrh w0, [x2]
-; -O0: cmp w0, w9, uxth
-; -O0: stlxrh w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst_weak:
-; -O1: bl __aarch64_cas2_acq_rel
+; CHECK-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst_weak:
+; CHECK: bl __aarch64_cas2_acq_rel
%pair = cmpxchg weak ptr %ptr, i16 %expected, i16 %new acq_rel seq_cst, align 2
%r = extractvalue { i16, i1 } %pair, 0
ret i16 %r
}
define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic(i16 %expected, i16 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic:
-; -O0: ldaxrh w0, [x2]
-; -O0: cmp w0, w9, uxth
-; -O0: stlxrh w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic:
-; -O1: bl __aarch64_cas2_acq_rel
+; CHECK-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic:
+; CHECK: bl __aarch64_cas2_acq_rel
%pair = cmpxchg ptr %ptr, i16 %expected, i16 %new seq_cst monotonic, align 2
%r = extractvalue { i16, i1 } %pair, 0
ret i16 %r
}
define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic_weak:
-; -O0: ldaxrh w0, [x2]
-; -O0: cmp w0, w9, uxth
-; -O0: stlxrh w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic_weak:
-; -O1: bl __aarch64_cas2_acq_rel
+; CHECK-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic_weak:
+; CHECK: bl __aarch64_cas2_acq_rel
%pair = cmpxchg weak ptr %ptr, i16 %expected, i16 %new seq_cst monotonic, align 2
%r = extractvalue { i16, i1 } %pair, 0
ret i16 %r
}
define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire(i16 %expected, i16 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_acquire:
-; -O0: ldaxrh w0, [x2]
-; -O0: cmp w0, w9, uxth
-; -O0: stlxrh w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_acquire:
-; -O1: bl __aarch64_cas2_acq_rel
+; CHECK-LABEL: cmpxchg_i16_aligned_seq_cst_acquire:
+; CHECK: bl __aarch64_cas2_acq_rel
%pair = cmpxchg ptr %ptr, i16 %expected, i16 %new seq_cst acquire, align 2
%r = extractvalue { i16, i1 } %pair, 0
ret i16 %r
}
define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_acquire_weak:
-; -O0: ldaxrh w0, [x2]
-; -O0: cmp w0, w9, uxth
-; -O0: stlxrh w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_acquire_weak:
-; -O1: bl __aarch64_cas2_acq_rel
+; CHECK-LABEL: cmpxchg_i16_aligned_seq_cst_acquire_weak:
+; CHECK: bl __aarch64_cas2_acq_rel
%pair = cmpxchg weak ptr %ptr, i16 %expected, i16 %new seq_cst acquire, align 2
%r = extractvalue { i16, i1 } %pair, 0
ret i16 %r
}
define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst:
-; -O0: ldaxrh w0, [x2]
-; -O0: cmp w0, w9, uxth
-; -O0: stlxrh w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst:
-; -O1: bl __aarch64_cas2_acq_rel
+; CHECK-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst:
+; CHECK: bl __aarch64_cas2_acq_rel
%pair = cmpxchg ptr %ptr, i16 %expected, i16 %new seq_cst seq_cst, align 2
%r = extractvalue { i16, i1 } %pair, 0
ret i16 %r
}
define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst_weak:
-; -O0: ldaxrh w0, [x2]
-; -O0: cmp w0, w9, uxth
-; -O0: stlxrh w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst_weak:
-; -O1: bl __aarch64_cas2_acq_rel
+; CHECK-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst_weak:
+; CHECK: bl __aarch64_cas2_acq_rel
%pair = cmpxchg weak ptr %ptr, i16 %expected, i16 %new seq_cst seq_cst, align 2
%r = extractvalue { i16, i1 } %pair, 0
ret i16 %r
}
define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic(i32 %expected, i32 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i32_aligned_monotonic_monotonic:
-; -O0: ldaxr w0, [x2]
-; -O0: cmp w0, w9
-; -O0: stlxr w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i32_aligned_monotonic_monotonic:
-; -O1: bl __aarch64_cas4_relax
+; CHECK-LABEL: cmpxchg_i32_aligned_monotonic_monotonic:
+; CHECK: bl __aarch64_cas4_relax
%pair = cmpxchg ptr %ptr, i32 %expected, i32 %new monotonic monotonic, align 4
%r = extractvalue { i32, i1 } %pair, 0
ret i32 %r
}
define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i32_aligned_monotonic_monotonic_weak:
-; -O0: ldaxr w0, [x2]
-; -O0: cmp w0, w9
-; -O0: stlxr w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i32_aligned_monotonic_monotonic_weak:
-; -O1: bl __aarch64_cas4_relax
+; CHECK-LABEL: cmpxchg_i32_aligned_monotonic_monotonic_weak:
+; CHECK: bl __aarch64_cas4_relax
%pair = cmpxchg weak ptr %ptr, i32 %expected, i32 %new monotonic monotonic, align 4
%r = extractvalue { i32, i1 } %pair, 0
ret i32 %r
}
define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire(i32 %expected, i32 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i32_aligned_monotonic_acquire:
-; -O0: ldaxr w0, [x2]
-; -O0: cmp w0, w9
-; -O0: stlxr w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i32_aligned_monotonic_acquire:
-; -O1: bl __aarch64_cas4_acq
+; CHECK-LABEL: cmpxchg_i32_aligned_monotonic_acquire:
+; CHECK: bl __aarch64_cas4_acq
%pair = cmpxchg ptr %ptr, i32 %expected, i32 %new monotonic acquire, align 4
%r = extractvalue { i32, i1 } %pair, 0
ret i32 %r
}
define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i32_aligned_monotonic_acquire_weak:
-; -O0: ldaxr w0, [x2]
-; -O0: cmp w0, w9
-; -O0: stlxr w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i32_aligned_monotonic_acquire_weak:
-; -O1: bl __aarch64_cas4_acq
+; CHECK-LABEL: cmpxchg_i32_aligned_monotonic_acquire_weak:
+; CHECK: bl __aarch64_cas4_acq
%pair = cmpxchg weak ptr %ptr, i32 %expected, i32 %new monotonic acquire, align 4
%r = extractvalue { i32, i1 } %pair, 0
ret i32 %r
}
define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst:
-; -O0: ldaxr w0, [x2]
-; -O0: cmp w0, w9
-; -O0: stlxr w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst:
-; -O1: bl __aarch64_cas4_acq_rel
+; CHECK-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst:
+; CHECK: bl __aarch64_cas4_acq_rel
%pair = cmpxchg ptr %ptr, i32 %expected, i32 %new monotonic seq_cst, align 4
%r = extractvalue { i32, i1 } %pair, 0
ret i32 %r
}
define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst_weak:
-; -O0: ldaxr w0, [x2]
-; -O0: cmp w0, w9
-; -O0: stlxr w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst_weak:
-; -O1: bl __aarch64_cas4_acq_rel
+; CHECK-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst_weak:
+; CHECK: bl __aarch64_cas4_acq_rel
%pair = cmpxchg weak ptr %ptr, i32 %expected, i32 %new monotonic seq_cst, align 4
%r = extractvalue { i32, i1 } %pair, 0
ret i32 %r
}
define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic(i32 %expected, i32 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i32_aligned_acquire_monotonic:
-; -O0: ldaxr w0, [x2]
-; -O0: cmp w0, w9
-; -O0: stlxr w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i32_aligned_acquire_monotonic:
-; -O1: bl __aarch64_cas4_acq
+; CHECK-LABEL: cmpxchg_i32_aligned_acquire_monotonic:
+; CHECK: bl __aarch64_cas4_acq
%pair = cmpxchg ptr %ptr, i32 %expected, i32 %new acquire monotonic, align 4
%r = extractvalue { i32, i1 } %pair, 0
ret i32 %r
}
define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i32_aligned_acquire_monotonic_weak:
-; -O0: ldaxr w0, [x2]
-; -O0: cmp w0, w9
-; -O0: stlxr w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i32_aligned_acquire_monotonic_weak:
-; -O1: bl __aarch64_cas4_acq
+; CHECK-LABEL: cmpxchg_i32_aligned_acquire_monotonic_weak:
+; CHECK: bl __aarch64_cas4_acq
%pair = cmpxchg weak ptr %ptr, i32 %expected, i32 %new acquire monotonic, align 4
%r = extractvalue { i32, i1 } %pair, 0
ret i32 %r
}
define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire(i32 %expected, i32 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i32_aligned_acquire_acquire:
-; -O0: ldaxr w0, [x2]
-; -O0: cmp w0, w9
-; -O0: stlxr w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i32_aligned_acquire_acquire:
-; -O1: bl __aarch64_cas4_acq
+; CHECK-LABEL: cmpxchg_i32_aligned_acquire_acquire:
+; CHECK: bl __aarch64_cas4_acq
%pair = cmpxchg ptr %ptr, i32 %expected, i32 %new acquire acquire, align 4
%r = extractvalue { i32, i1 } %pair, 0
ret i32 %r
}
define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i32_aligned_acquire_acquire_weak:
-; -O0: ldaxr w0, [x2]
-; -O0: cmp w0, w9
-; -O0: stlxr w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i32_aligned_acquire_acquire_weak:
-; -O1: bl __aarch64_cas4_acq
+; CHECK-LABEL: cmpxchg_i32_aligned_acquire_acquire_weak:
+; CHECK: bl __aarch64_cas4_acq
%pair = cmpxchg weak ptr %ptr, i32 %expected, i32 %new acquire acquire, align 4
%r = extractvalue { i32, i1 } %pair, 0
ret i32 %r
}
define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i32_aligned_acquire_seq_cst:
-; -O0: ldaxr w0, [x2]
-; -O0: cmp w0, w9
-; -O0: stlxr w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i32_aligned_acquire_seq_cst:
-; -O1: bl __aarch64_cas4_acq_rel
+; CHECK-LABEL: cmpxchg_i32_aligned_acquire_seq_cst:
+; CHECK: bl __aarch64_cas4_acq_rel
%pair = cmpxchg ptr %ptr, i32 %expected, i32 %new acquire seq_cst, align 4
%r = extractvalue { i32, i1 } %pair, 0
ret i32 %r
}
define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i32_aligned_acquire_seq_cst_weak:
-; -O0: ldaxr w0, [x2]
-; -O0: cmp w0, w9
-; -O0: stlxr w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i32_aligned_acquire_seq_cst_weak:
-; -O1: bl __aarch64_cas4_acq_rel
+; CHECK-LABEL: cmpxchg_i32_aligned_acquire_seq_cst_weak:
+; CHECK: bl __aarch64_cas4_acq_rel
%pair = cmpxchg weak ptr %ptr, i32 %expected, i32 %new acquire seq_cst, align 4
%r = extractvalue { i32, i1 } %pair, 0
ret i32 %r
}
define dso_local i32 @cmpxchg_i32_aligned_release_monotonic(i32 %expected, i32 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i32_aligned_release_monotonic:
-; -O0: ldaxr w0, [x2]
-; -O0: cmp w0, w9
-; -O0: stlxr w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i32_aligned_release_monotonic:
-; -O1: bl __aarch64_cas4_rel
+; CHECK-LABEL: cmpxchg_i32_aligned_release_monotonic:
+; CHECK: bl __aarch64_cas4_rel
%pair = cmpxchg ptr %ptr, i32 %expected, i32 %new release monotonic, align 4
%r = extractvalue { i32, i1 } %pair, 0
ret i32 %r
}
define dso_local i32 @cmpxchg_i32_aligned_release_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i32_aligned_release_monotonic_weak:
-; -O0: ldaxr w0, [x2]
-; -O0: cmp w0, w9
-; -O0: stlxr w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i32_aligned_release_monotonic_weak:
-; -O1: bl __aarch64_cas4_rel
+; CHECK-LABEL: cmpxchg_i32_aligned_release_monotonic_weak:
+; CHECK: bl __aarch64_cas4_rel
%pair = cmpxchg weak ptr %ptr, i32 %expected, i32 %new release monotonic, align 4
%r = extractvalue { i32, i1 } %pair, 0
ret i32 %r
}
define dso_local i32 @cmpxchg_i32_aligned_release_acquire(i32 %expected, i32 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i32_aligned_release_acquire:
-; -O0: ldaxr w0, [x2]
-; -O0: cmp w0, w9
-; -O0: stlxr w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i32_aligned_release_acquire:
-; -O1: bl __aarch64_cas4_acq_rel
+; CHECK-LABEL: cmpxchg_i32_aligned_release_acquire:
+; CHECK: bl __aarch64_cas4_acq_rel
%pair = cmpxchg ptr %ptr, i32 %expected, i32 %new release acquire, align 4
%r = extractvalue { i32, i1 } %pair, 0
ret i32 %r
}
define dso_local i32 @cmpxchg_i32_aligned_release_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i32_aligned_release_acquire_weak:
-; -O0: ldaxr w0, [x2]
-; -O0: cmp w0, w9
-; -O0: stlxr w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i32_aligned_release_acquire_weak:
-; -O1: bl __aarch64_cas4_acq_rel
+; CHECK-LABEL: cmpxchg_i32_aligned_release_acquire_weak:
+; CHECK: bl __aarch64_cas4_acq_rel
%pair = cmpxchg weak ptr %ptr, i32 %expected, i32 %new release acquire, align 4
%r = extractvalue { i32, i1 } %pair, 0
ret i32 %r
}
define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i32_aligned_release_seq_cst:
-; -O0: ldaxr w0, [x2]
-; -O0: cmp w0, w9
-; -O0: stlxr w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i32_aligned_release_seq_cst:
-; -O1: bl __aarch64_cas4_acq_rel
+; CHECK-LABEL: cmpxchg_i32_aligned_release_seq_cst:
+; CHECK: bl __aarch64_cas4_acq_rel
%pair = cmpxchg ptr %ptr, i32 %expected, i32 %new release seq_cst, align 4
%r = extractvalue { i32, i1 } %pair, 0
ret i32 %r
}
define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i32_aligned_release_seq_cst_weak:
-; -O0: ldaxr w0, [x2]
-; -O0: cmp w0, w9
-; -O0: stlxr w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i32_aligned_release_seq_cst_weak:
-; -O1: bl __aarch64_cas4_acq_rel
+; CHECK-LABEL: cmpxchg_i32_aligned_release_seq_cst_weak:
+; CHECK: bl __aarch64_cas4_acq_rel
%pair = cmpxchg weak ptr %ptr, i32 %expected, i32 %new release seq_cst, align 4
%r = extractvalue { i32, i1 } %pair, 0
ret i32 %r
}
define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic(i32 %expected, i32 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic:
-; -O0: ldaxr w0, [x2]
-; -O0: cmp w0, w9
-; -O0: stlxr w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic:
-; -O1: bl __aarch64_cas4_acq_rel
+; CHECK-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic:
+; CHECK: bl __aarch64_cas4_acq_rel
%pair = cmpxchg ptr %ptr, i32 %expected, i32 %new acq_rel monotonic, align 4
%r = extractvalue { i32, i1 } %pair, 0
ret i32 %r
}
define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic_weak:
-; -O0: ldaxr w0, [x2]
-; -O0: cmp w0, w9
-; -O0: stlxr w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic_weak:
-; -O1: bl __aarch64_cas4_acq_rel
+; CHECK-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic_weak:
+; CHECK: bl __aarch64_cas4_acq_rel
%pair = cmpxchg weak ptr %ptr, i32 %expected, i32 %new acq_rel monotonic, align 4
%r = extractvalue { i32, i1 } %pair, 0
ret i32 %r
}
define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire(i32 %expected, i32 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_acquire:
-; -O0: ldaxr w0, [x2]
-; -O0: cmp w0, w9
-; -O0: stlxr w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_acquire:
-; -O1: bl __aarch64_cas4_acq_rel
+; CHECK-LABEL: cmpxchg_i32_aligned_acq_rel_acquire:
+; CHECK: bl __aarch64_cas4_acq_rel
%pair = cmpxchg ptr %ptr, i32 %expected, i32 %new acq_rel acquire, align 4
%r = extractvalue { i32, i1 } %pair, 0
ret i32 %r
}
define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_acquire_weak:
-; -O0: ldaxr w0, [x2]
-; -O0: cmp w0, w9
-; -O0: stlxr w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_acquire_weak:
-; -O1: bl __aarch64_cas4_acq_rel
+; CHECK-LABEL: cmpxchg_i32_aligned_acq_rel_acquire_weak:
+; CHECK: bl __aarch64_cas4_acq_rel
%pair = cmpxchg weak ptr %ptr, i32 %expected, i32 %new acq_rel acquire, align 4
%r = extractvalue { i32, i1 } %pair, 0
ret i32 %r
}
define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst:
-; -O0: ldaxr w0, [x2]
-; -O0: cmp w0, w9
-; -O0: stlxr w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst:
-; -O1: bl __aarch64_cas4_acq_rel
+; CHECK-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst:
+; CHECK: bl __aarch64_cas4_acq_rel
%pair = cmpxchg ptr %ptr, i32 %expected, i32 %new acq_rel seq_cst, align 4
%r = extractvalue { i32, i1 } %pair, 0
ret i32 %r
}
define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst_weak:
-; -O0: ldaxr w0, [x2]
-; -O0: cmp w0, w9
-; -O0: stlxr w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst_weak:
-; -O1: bl __aarch64_cas4_acq_rel
+; CHECK-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst_weak:
+; CHECK: bl __aarch64_cas4_acq_rel
%pair = cmpxchg weak ptr %ptr, i32 %expected, i32 %new acq_rel seq_cst, align 4
%r = extractvalue { i32, i1 } %pair, 0
ret i32 %r
}
define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic(i32 %expected, i32 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic:
-; -O0: ldaxr w0, [x2]
-; -O0: cmp w0, w9
-; -O0: stlxr w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic:
-; -O1: bl __aarch64_cas4_acq_rel
+; CHECK-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic:
+; CHECK: bl __aarch64_cas4_acq_rel
%pair = cmpxchg ptr %ptr, i32 %expected, i32 %new seq_cst monotonic, align 4
%r = extractvalue { i32, i1 } %pair, 0
ret i32 %r
}
define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic_weak:
-; -O0: ldaxr w0, [x2]
-; -O0: cmp w0, w9
-; -O0: stlxr w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic_weak:
-; -O1: bl __aarch64_cas4_acq_rel
+; CHECK-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic_weak:
+; CHECK: bl __aarch64_cas4_acq_rel
%pair = cmpxchg weak ptr %ptr, i32 %expected, i32 %new seq_cst monotonic, align 4
%r = extractvalue { i32, i1 } %pair, 0
ret i32 %r
}
define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire(i32 %expected, i32 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_acquire:
-; -O0: ldaxr w0, [x2]
-; -O0: cmp w0, w9
-; -O0: stlxr w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_acquire:
-; -O1: bl __aarch64_cas4_acq_rel
+; CHECK-LABEL: cmpxchg_i32_aligned_seq_cst_acquire:
+; CHECK: bl __aarch64_cas4_acq_rel
%pair = cmpxchg ptr %ptr, i32 %expected, i32 %new seq_cst acquire, align 4
%r = extractvalue { i32, i1 } %pair, 0
ret i32 %r
}
define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_acquire_weak:
-; -O0: ldaxr w0, [x2]
-; -O0: cmp w0, w9
-; -O0: stlxr w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_acquire_weak:
-; -O1: bl __aarch64_cas4_acq_rel
+; CHECK-LABEL: cmpxchg_i32_aligned_seq_cst_acquire_weak:
+; CHECK: bl __aarch64_cas4_acq_rel
%pair = cmpxchg weak ptr %ptr, i32 %expected, i32 %new seq_cst acquire, align 4
%r = extractvalue { i32, i1 } %pair, 0
ret i32 %r
}
define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst:
-; -O0: ldaxr w0, [x2]
-; -O0: cmp w0, w9
-; -O0: stlxr w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst:
-; -O1: bl __aarch64_cas4_acq_rel
+; CHECK-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst:
+; CHECK: bl __aarch64_cas4_acq_rel
%pair = cmpxchg ptr %ptr, i32 %expected, i32 %new seq_cst seq_cst, align 4
%r = extractvalue { i32, i1 } %pair, 0
ret i32 %r
}
define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst_weak:
-; -O0: ldaxr w0, [x2]
-; -O0: cmp w0, w9
-; -O0: stlxr w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst_weak:
-; -O1: bl __aarch64_cas4_acq_rel
+; CHECK-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst_weak:
+; CHECK: bl __aarch64_cas4_acq_rel
%pair = cmpxchg weak ptr %ptr, i32 %expected, i32 %new seq_cst seq_cst, align 4
%r = extractvalue { i32, i1 } %pair, 0
ret i32 %r
}
define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic(i64 %expected, i64 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i64_aligned_monotonic_monotonic:
-; -O0: ldaxr x0, [x2]
-; -O0: cmp x0, x9
-; -O0: stlxr w8, x1, [x2]
-;
-; -O1-LABEL: cmpxchg_i64_aligned_monotonic_monotonic:
-; -O1: bl __aarch64_cas8_relax
+; CHECK-LABEL: cmpxchg_i64_aligned_monotonic_monotonic:
+; CHECK: bl __aarch64_cas8_relax
%pair = cmpxchg ptr %ptr, i64 %expected, i64 %new monotonic monotonic, align 8
%r = extractvalue { i64, i1 } %pair, 0
ret i64 %r
}
define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i64_aligned_monotonic_monotonic_weak:
-; -O0: ldaxr x0, [x2]
-; -O0: cmp x0, x9
-; -O0: stlxr w8, x1, [x2]
-;
-; -O1-LABEL: cmpxchg_i64_aligned_monotonic_monotonic_weak:
-; -O1: bl __aarch64_cas8_relax
+; CHECK-LABEL: cmpxchg_i64_aligned_monotonic_monotonic_weak:
+; CHECK: bl __aarch64_cas8_relax
%pair = cmpxchg weak ptr %ptr, i64 %expected, i64 %new monotonic monotonic, align 8
%r = extractvalue { i64, i1 } %pair, 0
ret i64 %r
}
define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire(i64 %expected, i64 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i64_aligned_monotonic_acquire:
-; -O0: ldaxr x0, [x2]
-; -O0: cmp x0, x9
-; -O0: stlxr w8, x1, [x2]
-;
-; -O1-LABEL: cmpxchg_i64_aligned_monotonic_acquire:
-; -O1: bl __aarch64_cas8_acq
+; CHECK-LABEL: cmpxchg_i64_aligned_monotonic_acquire:
+; CHECK: bl __aarch64_cas8_acq
%pair = cmpxchg ptr %ptr, i64 %expected, i64 %new monotonic acquire, align 8
%r = extractvalue { i64, i1 } %pair, 0
ret i64 %r
}
define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i64_aligned_monotonic_acquire_weak:
-; -O0: ldaxr x0, [x2]
-; -O0: cmp x0, x9
-; -O0: stlxr w8, x1, [x2]
-;
-; -O1-LABEL: cmpxchg_i64_aligned_monotonic_acquire_weak:
-; -O1: bl __aarch64_cas8_acq
+; CHECK-LABEL: cmpxchg_i64_aligned_monotonic_acquire_weak:
+; CHECK: bl __aarch64_cas8_acq
%pair = cmpxchg weak ptr %ptr, i64 %expected, i64 %new monotonic acquire, align 8
%r = extractvalue { i64, i1 } %pair, 0
ret i64 %r
}
define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst:
-; -O0: ldaxr x0, [x2]
-; -O0: cmp x0, x9
-; -O0: stlxr w8, x1, [x2]
-;
-; -O1-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst:
-; -O1: bl __aarch64_cas8_acq_rel
+; CHECK-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst:
+; CHECK: bl __aarch64_cas8_acq_rel
%pair = cmpxchg ptr %ptr, i64 %expected, i64 %new monotonic seq_cst, align 8
%r = extractvalue { i64, i1 } %pair, 0
ret i64 %r
}
define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst_weak:
-; -O0: ldaxr x0, [x2]
-; -O0: cmp x0, x9
-; -O0: stlxr w8, x1, [x2]
-;
-; -O1-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst_weak:
-; -O1: bl __aarch64_cas8_acq_rel
+; CHECK-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst_weak:
+; CHECK: bl __aarch64_cas8_acq_rel
%pair = cmpxchg weak ptr %ptr, i64 %expected, i64 %new monotonic seq_cst, align 8
%r = extractvalue { i64, i1 } %pair, 0
ret i64 %r
}
define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic(i64 %expected, i64 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i64_aligned_acquire_monotonic:
-; -O0: ldaxr x0, [x2]
-; -O0: cmp x0, x9
-; -O0: stlxr w8, x1, [x2]
-;
-; -O1-LABEL: cmpxchg_i64_aligned_acquire_monotonic:
-; -O1: bl __aarch64_cas8_acq
+; CHECK-LABEL: cmpxchg_i64_aligned_acquire_monotonic:
+; CHECK: bl __aarch64_cas8_acq
%pair = cmpxchg ptr %ptr, i64 %expected, i64 %new acquire monotonic, align 8
%r = extractvalue { i64, i1 } %pair, 0
ret i64 %r
}
define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i64_aligned_acquire_monotonic_weak:
-; -O0: ldaxr x0, [x2]
-; -O0: cmp x0, x9
-; -O0: stlxr w8, x1, [x2]
-;
-; -O1-LABEL: cmpxchg_i64_aligned_acquire_monotonic_weak:
-; -O1: bl __aarch64_cas8_acq
+; CHECK-LABEL: cmpxchg_i64_aligned_acquire_monotonic_weak:
+; CHECK: bl __aarch64_cas8_acq
%pair = cmpxchg weak ptr %ptr, i64 %expected, i64 %new acquire monotonic, align 8
%r = extractvalue { i64, i1 } %pair, 0
ret i64 %r
}
define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire(i64 %expected, i64 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i64_aligned_acquire_acquire:
-; -O0: ldaxr x0, [x2]
-; -O0: cmp x0, x9
-; -O0: stlxr w8, x1, [x2]
-;
-; -O1-LABEL: cmpxchg_i64_aligned_acquire_acquire:
-; -O1: bl __aarch64_cas8_acq
+; CHECK-LABEL: cmpxchg_i64_aligned_acquire_acquire:
+; CHECK: bl __aarch64_cas8_acq
%pair = cmpxchg ptr %ptr, i64 %expected, i64 %new acquire acquire, align 8
%r = extractvalue { i64, i1 } %pair, 0
ret i64 %r
}
define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i64_aligned_acquire_acquire_weak:
-; -O0: ldaxr x0, [x2]
-; -O0: cmp x0, x9
-; -O0: stlxr w8, x1, [x2]
-;
-; -O1-LABEL: cmpxchg_i64_aligned_acquire_acquire_weak:
-; -O1: bl __aarch64_cas8_acq
+; CHECK-LABEL: cmpxchg_i64_aligned_acquire_acquire_weak:
+; CHECK: bl __aarch64_cas8_acq
%pair = cmpxchg weak ptr %ptr, i64 %expected, i64 %new acquire acquire, align 8
%r = extractvalue { i64, i1 } %pair, 0
ret i64 %r
}
define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i64_aligned_acquire_seq_cst:
-; -O0: ldaxr x0, [x2]
-; -O0: cmp x0, x9
-; -O0: stlxr w8, x1, [x2]
-;
-; -O1-LABEL: cmpxchg_i64_aligned_acquire_seq_cst:
-; -O1: bl __aarch64_cas8_acq_rel
+; CHECK-LABEL: cmpxchg_i64_aligned_acquire_seq_cst:
+; CHECK: bl __aarch64_cas8_acq_rel
%pair = cmpxchg ptr %ptr, i64 %expected, i64 %new acquire seq_cst, align 8
%r = extractvalue { i64, i1 } %pair, 0
ret i64 %r
}
define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i64_aligned_acquire_seq_cst_weak:
-; -O0: ldaxr x0, [x2]
-; -O0: cmp x0, x9
-; -O0: stlxr w8, x1, [x2]
-;
-; -O1-LABEL: cmpxchg_i64_aligned_acquire_seq_cst_weak:
-; -O1: bl __aarch64_cas8_acq_rel
+; CHECK-LABEL: cmpxchg_i64_aligned_acquire_seq_cst_weak:
+; CHECK: bl __aarch64_cas8_acq_rel
%pair = cmpxchg weak ptr %ptr, i64 %expected, i64 %new acquire seq_cst, align 8
%r = extractvalue { i64, i1 } %pair, 0
ret i64 %r
}
define dso_local i64 @cmpxchg_i64_aligned_release_monotonic(i64 %expected, i64 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i64_aligned_release_monotonic:
-; -O0: ldaxr x0, [x2]
-; -O0: cmp x0, x9
-; -O0: stlxr w8, x1, [x2]
-;
-; -O1-LABEL: cmpxchg_i64_aligned_release_monotonic:
-; -O1: bl __aarch64_cas8_rel
+; CHECK-LABEL: cmpxchg_i64_aligned_release_monotonic:
+; CHECK: bl __aarch64_cas8_rel
%pair = cmpxchg ptr %ptr, i64 %expected, i64 %new release monotonic, align 8
%r = extractvalue { i64, i1 } %pair, 0
ret i64 %r
}
define dso_local i64 @cmpxchg_i64_aligned_release_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i64_aligned_release_monotonic_weak:
-; -O0: ldaxr x0, [x2]
-; -O0: cmp x0, x9
-; -O0: stlxr w8, x1, [x2]
-;
-; -O1-LABEL: cmpxchg_i64_aligned_release_monotonic_weak:
-; -O1: bl __aarch64_cas8_rel
+; CHECK-LABEL: cmpxchg_i64_aligned_release_monotonic_weak:
+; CHECK: bl __aarch64_cas8_rel
%pair = cmpxchg weak ptr %ptr, i64 %expected, i64 %new release monotonic, align 8
%r = extractvalue { i64, i1 } %pair, 0
ret i64 %r
}
define dso_local i64 @cmpxchg_i64_aligned_release_acquire(i64 %expected, i64 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i64_aligned_release_acquire:
-; -O0: ldaxr x0, [x2]
-; -O0: cmp x0, x9
-; -O0: stlxr w8, x1, [x2]
-;
-; -O1-LABEL: cmpxchg_i64_aligned_release_acquire:
-; -O1: bl __aarch64_cas8_acq_rel
+; CHECK-LABEL: cmpxchg_i64_aligned_release_acquire:
+; CHECK: bl __aarch64_cas8_acq_rel
%pair = cmpxchg ptr %ptr, i64 %expected, i64 %new release acquire, align 8
%r = extractvalue { i64, i1 } %pair, 0
ret i64 %r
}
define dso_local i64 @cmpxchg_i64_aligned_release_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i64_aligned_release_acquire_weak:
-; -O0: ldaxr x0, [x2]
-; -O0: cmp x0, x9
-; -O0: stlxr w8, x1, [x2]
-;
-; -O1-LABEL: cmpxchg_i64_aligned_release_acquire_weak:
-; -O1: bl __aarch64_cas8_acq_rel
+; CHECK-LABEL: cmpxchg_i64_aligned_release_acquire_weak:
+; CHECK: bl __aarch64_cas8_acq_rel
%pair = cmpxchg weak ptr %ptr, i64 %expected, i64 %new release acquire, align 8
%r = extractvalue { i64, i1 } %pair, 0
ret i64 %r
}
define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i64_aligned_release_seq_cst:
-; -O0: ldaxr x0, [x2]
-; -O0: cmp x0, x9
-; -O0: stlxr w8, x1, [x2]
-;
-; -O1-LABEL: cmpxchg_i64_aligned_release_seq_cst:
-; -O1: bl __aarch64_cas8_acq_rel
+; CHECK-LABEL: cmpxchg_i64_aligned_release_seq_cst:
+; CHECK: bl __aarch64_cas8_acq_rel
%pair = cmpxchg ptr %ptr, i64 %expected, i64 %new release seq_cst, align 8
%r = extractvalue { i64, i1 } %pair, 0
ret i64 %r
}
define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i64_aligned_release_seq_cst_weak:
-; -O0: ldaxr x0, [x2]
-; -O0: cmp x0, x9
-; -O0: stlxr w8, x1, [x2]
-;
-; -O1-LABEL: cmpxchg_i64_aligned_release_seq_cst_weak:
-; -O1: bl __aarch64_cas8_acq_rel
+; CHECK-LABEL: cmpxchg_i64_aligned_release_seq_cst_weak:
+; CHECK: bl __aarch64_cas8_acq_rel
%pair = cmpxchg weak ptr %ptr, i64 %expected, i64 %new release seq_cst, align 8
%r = extractvalue { i64, i1 } %pair, 0
ret i64 %r
}
define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic(i64 %expected, i64 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic:
-; -O0: ldaxr x0, [x2]
-; -O0: cmp x0, x9
-; -O0: stlxr w8, x1, [x2]
-;
-; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic:
-; -O1: bl __aarch64_cas8_acq_rel
+; CHECK-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic:
+; CHECK: bl __aarch64_cas8_acq_rel
%pair = cmpxchg ptr %ptr, i64 %expected, i64 %new acq_rel monotonic, align 8
%r = extractvalue { i64, i1 } %pair, 0
ret i64 %r
}
define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic_weak:
-; -O0: ldaxr x0, [x2]
-; -O0: cmp x0, x9
-; -O0: stlxr w8, x1, [x2]
-;
-; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic_weak:
-; -O1: bl __aarch64_cas8_acq_rel
+; CHECK-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic_weak:
+; CHECK: bl __aarch64_cas8_acq_rel
%pair = cmpxchg weak ptr %ptr, i64 %expected, i64 %new acq_rel monotonic, align 8
%r = extractvalue { i64, i1 } %pair, 0
ret i64 %r
}
define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire(i64 %expected, i64 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_acquire:
-; -O0: ldaxr x0, [x2]
-; -O0: cmp x0, x9
-; -O0: stlxr w8, x1, [x2]
-;
-; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_acquire:
-; -O1: bl __aarch64_cas8_acq_rel
+; CHECK-LABEL: cmpxchg_i64_aligned_acq_rel_acquire:
+; CHECK: bl __aarch64_cas8_acq_rel
%pair = cmpxchg ptr %ptr, i64 %expected, i64 %new acq_rel acquire, align 8
%r = extractvalue { i64, i1 } %pair, 0
ret i64 %r
}
define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_acquire_weak:
-; -O0: ldaxr x0, [x2]
-; -O0: cmp x0, x9
-; -O0: stlxr w8, x1, [x2]
-;
-; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_acquire_weak:
-; -O1: bl __aarch64_cas8_acq_rel
+; CHECK-LABEL: cmpxchg_i64_aligned_acq_rel_acquire_weak:
+; CHECK: bl __aarch64_cas8_acq_rel
%pair = cmpxchg weak ptr %ptr, i64 %expected, i64 %new acq_rel acquire, align 8
%r = extractvalue { i64, i1 } %pair, 0
ret i64 %r
}
define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst:
-; -O0: ldaxr x0, [x2]
-; -O0: cmp x0, x9
-; -O0: stlxr w8, x1, [x2]
-;
-; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst:
-; -O1: bl __aarch64_cas8_acq_rel
+; CHECK-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst:
+; CHECK: bl __aarch64_cas8_acq_rel
%pair = cmpxchg ptr %ptr, i64 %expected, i64 %new acq_rel seq_cst, align 8
%r = extractvalue { i64, i1 } %pair, 0
ret i64 %r
}
define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst_weak:
-; -O0: ldaxr x0, [x2]
-; -O0: cmp x0, x9
-; -O0: stlxr w8, x1, [x2]
-;
-; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst_weak:
-; -O1: bl __aarch64_cas8_acq_rel
+; CHECK-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst_weak:
+; CHECK: bl __aarch64_cas8_acq_rel
%pair = cmpxchg weak ptr %ptr, i64 %expected, i64 %new acq_rel seq_cst, align 8
%r = extractvalue { i64, i1 } %pair, 0
ret i64 %r
}
define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic(i64 %expected, i64 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic:
-; -O0: ldaxr x0, [x2]
-; -O0: cmp x0, x9
-; -O0: stlxr w8, x1, [x2]
-;
-; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic:
-; -O1: bl __aarch64_cas8_acq_rel
+; CHECK-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic:
+; CHECK: bl __aarch64_cas8_acq_rel
%pair = cmpxchg ptr %ptr, i64 %expected, i64 %new seq_cst monotonic, align 8
%r = extractvalue { i64, i1 } %pair, 0
ret i64 %r
}
define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic_weak:
-; -O0: ldaxr x0, [x2]
-; -O0: cmp x0, x9
-; -O0: stlxr w8, x1, [x2]
-;
-; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic_weak:
-; -O1: bl __aarch64_cas8_acq_rel
+; CHECK-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic_weak:
+; CHECK: bl __aarch64_cas8_acq_rel
%pair = cmpxchg weak ptr %ptr, i64 %expected, i64 %new seq_cst monotonic, align 8
%r = extractvalue { i64, i1 } %pair, 0
ret i64 %r
}
define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire(i64 %expected, i64 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_acquire:
-; -O0: ldaxr x0, [x2]
-; -O0: cmp x0, x9
-; -O0: stlxr w8, x1, [x2]
-;
-; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_acquire:
-; -O1: bl __aarch64_cas8_acq_rel
+; CHECK-LABEL: cmpxchg_i64_aligned_seq_cst_acquire:
+; CHECK: bl __aarch64_cas8_acq_rel
%pair = cmpxchg ptr %ptr, i64 %expected, i64 %new seq_cst acquire, align 8
%r = extractvalue { i64, i1 } %pair, 0
ret i64 %r
}
define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_acquire_weak:
-; -O0: ldaxr x0, [x2]
-; -O0: cmp x0, x9
-; -O0: stlxr w8, x1, [x2]
-;
-; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_acquire_weak:
-; -O1: bl __aarch64_cas8_acq_rel
+; CHECK-LABEL: cmpxchg_i64_aligned_seq_cst_acquire_weak:
+; CHECK: bl __aarch64_cas8_acq_rel
%pair = cmpxchg weak ptr %ptr, i64 %expected, i64 %new seq_cst acquire, align 8
%r = extractvalue { i64, i1 } %pair, 0
ret i64 %r
}
define dso_local i64 @cmpxchg_i64_aligned_seq_cst_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst:
-; -O0: ldaxr x0, [x2]
-; -O0: cmp x0, x9
-; -O0: stlxr w8, x1, [x2]
-;
-; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst:
-; -O1: bl __aarch64_cas8_acq_rel
+; CHECK-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst:
+; CHECK: bl __aarch64_cas8_acq_rel
%pair = cmpxchg ptr %ptr, i64 %expected, i64 %new seq_cst seq_cst, align 8
%r = extractvalue { i64, i1 } %pair, 0
ret i64 %r
}
define dso_local i64 @cmpxchg_i64_aligned_seq_cst_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst_weak:
-; -O0: ldaxr x0, [x2]
-; -O0: cmp x0, x9
-; -O0: stlxr w8, x1, [x2]
-;
-; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst_weak:
-; -O1: bl __aarch64_cas8_acq_rel
+; CHECK-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst_weak:
+; CHECK: bl __aarch64_cas8_acq_rel
%pair = cmpxchg weak ptr %ptr, i64 %expected, i64 %new seq_cst seq_cst, align 8
%r = extractvalue { i64, i1 } %pair, 0
ret i64 %r
}
define dso_local i128 @cmpxchg_i128_aligned_monotonic_monotonic(i128 %expected, i128 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i128_aligned_monotonic_monotonic:
-; -O0: ldxp x0, x1, [x4]
-; -O0: cmp x0, x9
-; -O0: cmp x1, x10
-; -O0: stxp w8, x2, x3, [x4]
-; -O0: stxp w8, x0, x1, [x4]
-;
-; -O1-LABEL: cmpxchg_i128_aligned_monotonic_monotonic:
-; -O1: bl __aarch64_cas16_relax
+; CHECK-LABEL: cmpxchg_i128_aligned_monotonic_monotonic:
+; CHECK: bl __aarch64_cas16_relax
%pair = cmpxchg ptr %ptr, i128 %expected, i128 %new monotonic monotonic, align 16
%r = extractvalue { i128, i1 } %pair, 0
ret i128 %r
}
define dso_local i128 @cmpxchg_i128_aligned_monotonic_monotonic_weak(i128 %expected, i128 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i128_aligned_monotonic_monotonic_weak:
-; -O0: ldxp x0, x1, [x4]
-; -O0: cmp x0, x9
-; -O0: cmp x1, x10
-; -O0: stxp w8, x2, x3, [x4]
-; -O0: stxp w8, x0, x1, [x4]
-;
-; -O1-LABEL: cmpxchg_i128_aligned_monotonic_monotonic_weak:
-; -O1: bl __aarch64_cas16_relax
+; CHECK-LABEL: cmpxchg_i128_aligned_monotonic_monotonic_weak:
+; CHECK: bl __aarch64_cas16_relax
%pair = cmpxchg weak ptr %ptr, i128 %expected, i128 %new monotonic monotonic, align 16
%r = extractvalue { i128, i1 } %pair, 0
ret i128 %r
}
define dso_local i128 @cmpxchg_i128_aligned_monotonic_acquire(i128 %expected, i128 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i128_aligned_monotonic_acquire:
-; -O0: ldaxp x0, x1, [x4]
-; -O0: cmp x0, x9
-; -O0: cmp x1, x10
-; -O0: stxp w8, x2, x3, [x4]
-; -O0: stxp w8, x0, x1, [x4]
-;
-; -O1-LABEL: cmpxchg_i128_aligned_monotonic_acquire:
-; -O1: bl __aarch64_cas16_acq
+; CHECK-LABEL: cmpxchg_i128_aligned_monotonic_acquire:
+; CHECK: bl __aarch64_cas16_acq
%pair = cmpxchg ptr %ptr, i128 %expected, i128 %new monotonic acquire, align 16
%r = extractvalue { i128, i1 } %pair, 0
ret i128 %r
}
define dso_local i128 @cmpxchg_i128_aligned_monotonic_acquire_weak(i128 %expected, i128 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i128_aligned_monotonic_acquire_weak:
-; -O0: ldaxp x0, x1, [x4]
-; -O0: cmp x0, x9
-; -O0: cmp x1, x10
-; -O0: stxp w8, x2, x3, [x4]
-; -O0: stxp w8, x0, x1, [x4]
-;
-; -O1-LABEL: cmpxchg_i128_aligned_monotonic_acquire_weak:
-; -O1: bl __aarch64_cas16_acq
+; CHECK-LABEL: cmpxchg_i128_aligned_monotonic_acquire_weak:
+; CHECK: bl __aarch64_cas16_acq
%pair = cmpxchg weak ptr %ptr, i128 %expected, i128 %new monotonic acquire, align 16
%r = extractvalue { i128, i1 } %pair, 0
ret i128 %r
}
define dso_local i128 @cmpxchg_i128_aligned_monotonic_seq_cst(i128 %expected, i128 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i128_aligned_monotonic_seq_cst:
-; -O0: ldaxp x0, x1, [x4]
-; -O0: cmp x0, x9
-; -O0: cmp x1, x10
-; -O0: stlxp w8, x2, x3, [x4]
-; -O0: stlxp w8, x0, x1, [x4]
-;
-; -O1-LABEL: cmpxchg_i128_aligned_monotonic_seq_cst:
-; -O1: bl __aarch64_cas16_acq_rel
+; CHECK-LABEL: cmpxchg_i128_aligned_monotonic_seq_cst:
+; CHECK: bl __aarch64_cas16_acq_rel
%pair = cmpxchg ptr %ptr, i128 %expected, i128 %new monotonic seq_cst, align 16
%r = extractvalue { i128, i1 } %pair, 0
ret i128 %r
}
define dso_local i128 @cmpxchg_i128_aligned_monotonic_seq_cst_weak(i128 %expected, i128 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i128_aligned_monotonic_seq_cst_weak:
-; -O0: ldaxp x0, x1, [x4]
-; -O0: cmp x0, x9
-; -O0: cmp x1, x10
-; -O0: stlxp w8, x2, x3, [x4]
-; -O0: stlxp w8, x0, x1, [x4]
-;
-; -O1-LABEL: cmpxchg_i128_aligned_monotonic_seq_cst_weak:
-; -O1: bl __aarch64_cas16_acq_rel
+; CHECK-LABEL: cmpxchg_i128_aligned_monotonic_seq_cst_weak:
+; CHECK: bl __aarch64_cas16_acq_rel
%pair = cmpxchg weak ptr %ptr, i128 %expected, i128 %new monotonic seq_cst, align 16
%r = extractvalue { i128, i1 } %pair, 0
ret i128 %r
}
define dso_local i128 @cmpxchg_i128_aligned_acquire_monotonic(i128 %expected, i128 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i128_aligned_acquire_monotonic:
-; -O0: ldaxp x0, x1, [x4]
-; -O0: cmp x0, x9
-; -O0: cmp x1, x10
-; -O0: stxp w8, x2, x3, [x4]
-; -O0: stxp w8, x0, x1, [x4]
-;
-; -O1-LABEL: cmpxchg_i128_aligned_acquire_monotonic:
-; -O1: bl __aarch64_cas16_acq
+; CHECK-LABEL: cmpxchg_i128_aligned_acquire_monotonic:
+; CHECK: bl __aarch64_cas16_acq
%pair = cmpxchg ptr %ptr, i128 %expected, i128 %new acquire monotonic, align 16
%r = extractvalue { i128, i1 } %pair, 0
ret i128 %r
}
define dso_local i128 @cmpxchg_i128_aligned_acquire_monotonic_weak(i128 %expected, i128 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i128_aligned_acquire_monotonic_weak:
-; -O0: ldaxp x0, x1, [x4]
-; -O0: cmp x0, x9
-; -O0: cmp x1, x10
-; -O0: stxp w8, x2, x3, [x4]
-; -O0: stxp w8, x0, x1, [x4]
-;
-; -O1-LABEL: cmpxchg_i128_aligned_acquire_monotonic_weak:
-; -O1: bl __aarch64_cas16_acq
+; CHECK-LABEL: cmpxchg_i128_aligned_acquire_monotonic_weak:
+; CHECK: bl __aarch64_cas16_acq
%pair = cmpxchg weak ptr %ptr, i128 %expected, i128 %new acquire monotonic, align 16
%r = extractvalue { i128, i1 } %pair, 0
ret i128 %r
}
define dso_local i128 @cmpxchg_i128_aligned_acquire_acquire(i128 %expected, i128 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i128_aligned_acquire_acquire:
-; -O0: ldaxp x0, x1, [x4]
-; -O0: cmp x0, x9
-; -O0: cmp x1, x10
-; -O0: stxp w8, x2, x3, [x4]
-; -O0: stxp w8, x0, x1, [x4]
-;
-; -O1-LABEL: cmpxchg_i128_aligned_acquire_acquire:
-; -O1: bl __aarch64_cas16_acq
+; CHECK-LABEL: cmpxchg_i128_aligned_acquire_acquire:
+; CHECK: bl __aarch64_cas16_acq
%pair = cmpxchg ptr %ptr, i128 %expected, i128 %new acquire acquire, align 16
%r = extractvalue { i128, i1 } %pair, 0
ret i128 %r
}
define dso_local i128 @cmpxchg_i128_aligned_acquire_acquire_weak(i128 %expected, i128 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i128_aligned_acquire_acquire_weak:
-; -O0: ldaxp x0, x1, [x4]
-; -O0: cmp x0, x9
-; -O0: cmp x1, x10
-; -O0: stxp w8, x2, x3, [x4]
-; -O0: stxp w8, x0, x1, [x4]
-;
-; -O1-LABEL: cmpxchg_i128_aligned_acquire_acquire_weak:
-; -O1: bl __aarch64_cas16_acq
+; CHECK-LABEL: cmpxchg_i128_aligned_acquire_acquire_weak:
+; CHECK: bl __aarch64_cas16_acq
%pair = cmpxchg weak ptr %ptr, i128 %expected, i128 %new acquire acquire, align 16
%r = extractvalue { i128, i1 } %pair, 0
ret i128 %r
}
define dso_local i128 @cmpxchg_i128_aligned_acquire_seq_cst(i128 %expected, i128 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i128_aligned_acquire_seq_cst:
-; -O0: ldaxp x0, x1, [x4]
-; -O0: cmp x0, x9
-; -O0: cmp x1, x10
-; -O0: stlxp w8, x2, x3, [x4]
-; -O0: stlxp w8, x0, x1, [x4]
-;
-; -O1-LABEL: cmpxchg_i128_aligned_acquire_seq_cst:
-; -O1: bl __aarch64_cas16_acq_rel
+; CHECK-LABEL: cmpxchg_i128_aligned_acquire_seq_cst:
+; CHECK: bl __aarch64_cas16_acq_rel
%pair = cmpxchg ptr %ptr, i128 %expected, i128 %new acquire seq_cst, align 16
%r = extractvalue { i128, i1 } %pair, 0
ret i128 %r
}
define dso_local i128 @cmpxchg_i128_aligned_acquire_seq_cst_weak(i128 %expected, i128 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i128_aligned_acquire_seq_cst_weak:
-; -O0: ldaxp x0, x1, [x4]
-; -O0: cmp x0, x9
-; -O0: cmp x1, x10
-; -O0: stlxp w8, x2, x3, [x4]
-; -O0: stlxp w8, x0, x1, [x4]
-;
-; -O1-LABEL: cmpxchg_i128_aligned_acquire_seq_cst_weak:
-; -O1: bl __aarch64_cas16_acq_rel
+; CHECK-LABEL: cmpxchg_i128_aligned_acquire_seq_cst_weak:
+; CHECK: bl __aarch64_cas16_acq_rel
%pair = cmpxchg weak ptr %ptr, i128 %expected, i128 %new acquire seq_cst, align 16
%r = extractvalue { i128, i1 } %pair, 0
ret i128 %r
}
define dso_local i128 @cmpxchg_i128_aligned_release_monotonic(i128 %expected, i128 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i128_aligned_release_monotonic:
-; -O0: ldxp x0, x1, [x4]
-; -O0: cmp x0, x9
-; -O0: cmp x1, x10
-; -O0: stlxp w8, x2, x3, [x4]
-; -O0: stlxp w8, x0, x1, [x4]
-;
-; -O1-LABEL: cmpxchg_i128_aligned_release_monotonic:
-; -O1: bl __aarch64_cas16_rel
+; CHECK-LABEL: cmpxchg_i128_aligned_release_monotonic:
+; CHECK: bl __aarch64_cas16_rel
%pair = cmpxchg ptr %ptr, i128 %expected, i128 %new release monotonic, align 16
%r = extractvalue { i128, i1 } %pair, 0
ret i128 %r
}
define dso_local i128 @cmpxchg_i128_aligned_release_monotonic_weak(i128 %expected, i128 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i128_aligned_release_monotonic_weak:
-; -O0: ldxp x0, x1, [x4]
-; -O0: cmp x0, x9
-; -O0: cmp x1, x10
-; -O0: stlxp w8, x2, x3, [x4]
-; -O0: stlxp w8, x0, x1, [x4]
-;
-; -O1-LABEL: cmpxchg_i128_aligned_release_monotonic_weak:
-; -O1: bl __aarch64_cas16_rel
+; CHECK-LABEL: cmpxchg_i128_aligned_release_monotonic_weak:
+; CHECK: bl __aarch64_cas16_rel
%pair = cmpxchg weak ptr %ptr, i128 %expected, i128 %new release monotonic, align 16
%r = extractvalue { i128, i1 } %pair, 0
ret i128 %r
}
define dso_local i128 @cmpxchg_i128_aligned_release_acquire(i128 %expected, i128 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i128_aligned_release_acquire:
-; -O0: ldaxp x0, x1, [x4]
-; -O0: cmp x0, x9
-; -O0: cmp x1, x10
-; -O0: stlxp w8, x2, x3, [x4]
-; -O0: stlxp w8, x0, x1, [x4]
-;
-; -O1-LABEL: cmpxchg_i128_aligned_release_acquire:
-; -O1: bl __aarch64_cas16_acq_rel
+; CHECK-LABEL: cmpxchg_i128_aligned_release_acquire:
+; CHECK: bl __aarch64_cas16_acq_rel
%pair = cmpxchg ptr %ptr, i128 %expected, i128 %new release acquire, align 16
%r = extractvalue { i128, i1 } %pair, 0
ret i128 %r
}
define dso_local i128 @cmpxchg_i128_aligned_release_acquire_weak(i128 %expected, i128 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i128_aligned_release_acquire_weak:
-; -O0: ldaxp x0, x1, [x4]
-; -O0: cmp x0, x9
-; -O0: cmp x1, x10
-; -O0: stlxp w8, x2, x3, [x4]
-; -O0: stlxp w8, x0, x1, [x4]
-;
-; -O1-LABEL: cmpxchg_i128_aligned_release_acquire_weak:
-; -O1: bl __aarch64_cas16_acq_rel
+; CHECK-LABEL: cmpxchg_i128_aligned_release_acquire_weak:
+; CHECK: bl __aarch64_cas16_acq_rel
%pair = cmpxchg weak ptr %ptr, i128 %expected, i128 %new release acquire, align 16
%r = extractvalue { i128, i1 } %pair, 0
ret i128 %r
}
define dso_local i128 @cmpxchg_i128_aligned_release_seq_cst(i128 %expected, i128 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i128_aligned_release_seq_cst:
-; -O0: ldaxp x0, x1, [x4]
-; -O0: cmp x0, x9
-; -O0: cmp x1, x10
-; -O0: stlxp w8, x2, x3, [x4]
-; -O0: stlxp w8, x0, x1, [x4]
-;
-; -O1-LABEL: cmpxchg_i128_aligned_release_seq_cst:
-; -O1: bl __aarch64_cas16_acq_rel
+; CHECK-LABEL: cmpxchg_i128_aligned_release_seq_cst:
+; CHECK: bl __aarch64_cas16_acq_rel
%pair = cmpxchg ptr %ptr, i128 %expected, i128 %new release seq_cst, align 16
%r = extractvalue { i128, i1 } %pair, 0
ret i128 %r
}
define dso_local i128 @cmpxchg_i128_aligned_release_seq_cst_weak(i128 %expected, i128 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i128_aligned_release_seq_cst_weak:
-; -O0: ldaxp x0, x1, [x4]
-; -O0: cmp x0, x9
-; -O0: cmp x1, x10
-; -O0: stlxp w8, x2, x3, [x4]
-; -O0: stlxp w8, x0, x1, [x4]
-;
-; -O1-LABEL: cmpxchg_i128_aligned_release_seq_cst_weak:
-; -O1: bl __aarch64_cas16_acq_rel
+; CHECK-LABEL: cmpxchg_i128_aligned_release_seq_cst_weak:
+; CHECK: bl __aarch64_cas16_acq_rel
%pair = cmpxchg weak ptr %ptr, i128 %expected, i128 %new release seq_cst, align 16
%r = extractvalue { i128, i1 } %pair, 0
ret i128 %r
}
define dso_local i128 @cmpxchg_i128_aligned_acq_rel_monotonic(i128 %expected, i128 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i128_aligned_acq_rel_monotonic:
-; -O0: ldaxp x0, x1, [x4]
-; -O0: cmp x0, x9
-; -O0: cmp x1, x10
-; -O0: stlxp w8, x2, x3, [x4]
-; -O0: stlxp w8, x0, x1, [x4]
-;
-; -O1-LABEL: cmpxchg_i128_aligned_acq_rel_monotonic:
-; -O1: bl __aarch64_cas16_acq_rel
+; CHECK-LABEL: cmpxchg_i128_aligned_acq_rel_monotonic:
+; CHECK: bl __aarch64_cas16_acq_rel
%pair = cmpxchg ptr %ptr, i128 %expected, i128 %new acq_rel monotonic, align 16
%r = extractvalue { i128, i1 } %pair, 0
ret i128 %r
}
define dso_local i128 @cmpxchg_i128_aligned_acq_rel_monotonic_weak(i128 %expected, i128 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i128_aligned_acq_rel_monotonic_weak:
-; -O0: ldaxp x0, x1, [x4]
-; -O0: cmp x0, x9
-; -O0: cmp x1, x10
-; -O0: stlxp w8, x2, x3, [x4]
-; -O0: stlxp w8, x0, x1, [x4]
-;
-; -O1-LABEL: cmpxchg_i128_aligned_acq_rel_monotonic_weak:
-; -O1: bl __aarch64_cas16_acq_rel
+; CHECK-LABEL: cmpxchg_i128_aligned_acq_rel_monotonic_weak:
+; CHECK: bl __aarch64_cas16_acq_rel
%pair = cmpxchg weak ptr %ptr, i128 %expected, i128 %new acq_rel monotonic, align 16
%r = extractvalue { i128, i1 } %pair, 0
ret i128 %r
}
define dso_local i128 @cmpxchg_i128_aligned_acq_rel_acquire(i128 %expected, i128 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i128_aligned_acq_rel_acquire:
-; -O0: ldaxp x0, x1, [x4]
-; -O0: cmp x0, x9
-; -O0: cmp x1, x10
-; -O0: stlxp w8, x2, x3, [x4]
-; -O0: stlxp w8, x0, x1, [x4]
-;
-; -O1-LABEL: cmpxchg_i128_aligned_acq_rel_acquire:
-; -O1: bl __aarch64_cas16_acq_rel
+; CHECK-LABEL: cmpxchg_i128_aligned_acq_rel_acquire:
+; CHECK: bl __aarch64_cas16_acq_rel
%pair = cmpxchg ptr %ptr, i128 %expected, i128 %new acq_rel acquire, align 16
%r = extractvalue { i128, i1 } %pair, 0
ret i128 %r
}
define dso_local i128 @cmpxchg_i128_aligned_acq_rel_acquire_weak(i128 %expected, i128 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i128_aligned_acq_rel_acquire_weak:
-; -O0: ldaxp x0, x1, [x4]
-; -O0: cmp x0, x9
-; -O0: cmp x1, x10
-; -O0: stlxp w8, x2, x3, [x4]
-; -O0: stlxp w8, x0, x1, [x4]
-;
-; -O1-LABEL: cmpxchg_i128_aligned_acq_rel_acquire_weak:
-; -O1: bl __aarch64_cas16_acq_rel
+; CHECK-LABEL: cmpxchg_i128_aligned_acq_rel_acquire_weak:
+; CHECK: bl __aarch64_cas16_acq_rel
%pair = cmpxchg weak ptr %ptr, i128 %expected, i128 %new acq_rel acquire, align 16
%r = extractvalue { i128, i1 } %pair, 0
ret i128 %r
}
define dso_local i128 @cmpxchg_i128_aligned_acq_rel_seq_cst(i128 %expected, i128 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i128_aligned_acq_rel_seq_cst:
-; -O0: ldaxp x0, x1, [x4]
-; -O0: cmp x0, x9
-; -O0: cmp x1, x10
-; -O0: stlxp w8, x2, x3, [x4]
-; -O0: stlxp w8, x0, x1, [x4]
-;
-; -O1-LABEL: cmpxchg_i128_aligned_acq_rel_seq_cst:
-; -O1: bl __aarch64_cas16_acq_rel
+; CHECK-LABEL: cmpxchg_i128_aligned_acq_rel_seq_cst:
+; CHECK: bl __aarch64_cas16_acq_rel
%pair = cmpxchg ptr %ptr, i128 %expected, i128 %new acq_rel seq_cst, align 16
%r = extractvalue { i128, i1 } %pair, 0
ret i128 %r
}
define dso_local i128 @cmpxchg_i128_aligned_acq_rel_seq_cst_weak(i128 %expected, i128 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i128_aligned_acq_rel_seq_cst_weak:
-; -O0: ldaxp x0, x1, [x4]
-; -O0: cmp x0, x9
-; -O0: cmp x1, x10
-; -O0: stlxp w8, x2, x3, [x4]
-; -O0: stlxp w8, x0, x1, [x4]
-;
-; -O1-LABEL: cmpxchg_i128_aligned_acq_rel_seq_cst_weak:
-; -O1: bl __aarch64_cas16_acq_rel
+; CHECK-LABEL: cmpxchg_i128_aligned_acq_rel_seq_cst_weak:
+; CHECK: bl __aarch64_cas16_acq_rel
%pair = cmpxchg weak ptr %ptr, i128 %expected, i128 %new acq_rel seq_cst, align 16
%r = extractvalue { i128, i1 } %pair, 0
ret i128 %r
}
define dso_local i128 @cmpxchg_i128_aligned_seq_cst_monotonic(i128 %expected, i128 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i128_aligned_seq_cst_monotonic:
-; -O0: ldaxp x0, x1, [x4]
-; -O0: cmp x0, x9
-; -O0: cmp x1, x10
-; -O0: stlxp w8, x2, x3, [x4]
-; -O0: stlxp w8, x0, x1, [x4]
-;
-; -O1-LABEL: cmpxchg_i128_aligned_seq_cst_monotonic:
-; -O1: bl __aarch64_cas16_acq_rel
+; CHECK-LABEL: cmpxchg_i128_aligned_seq_cst_monotonic:
+; CHECK: bl __aarch64_cas16_acq_rel
%pair = cmpxchg ptr %ptr, i128 %expected, i128 %new seq_cst monotonic, align 16
%r = extractvalue { i128, i1 } %pair, 0
ret i128 %r
}
define dso_local i128 @cmpxchg_i128_aligned_seq_cst_monotonic_weak(i128 %expected, i128 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i128_aligned_seq_cst_monotonic_weak:
-; -O0: ldaxp x0, x1, [x4]
-; -O0: cmp x0, x9
-; -O0: cmp x1, x10
-; -O0: stlxp w8, x2, x3, [x4]
-; -O0: stlxp w8, x0, x1, [x4]
-;
-; -O1-LABEL: cmpxchg_i128_aligned_seq_cst_monotonic_weak:
-; -O1: bl __aarch64_cas16_acq_rel
+; CHECK-LABEL: cmpxchg_i128_aligned_seq_cst_monotonic_weak:
+; CHECK: bl __aarch64_cas16_acq_rel
%pair = cmpxchg weak ptr %ptr, i128 %expected, i128 %new seq_cst monotonic, align 16
%r = extractvalue { i128, i1 } %pair, 0
ret i128 %r
}
define dso_local i128 @cmpxchg_i128_aligned_seq_cst_acquire(i128 %expected, i128 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i128_aligned_seq_cst_acquire:
-; -O0: ldaxp x0, x1, [x4]
-; -O0: cmp x0, x9
-; -O0: cmp x1, x10
-; -O0: stlxp w8, x2, x3, [x4]
-; -O0: stlxp w8, x0, x1, [x4]
-;
-; -O1-LABEL: cmpxchg_i128_aligned_seq_cst_acquire:
-; -O1: bl __aarch64_cas16_acq_rel
+; CHECK-LABEL: cmpxchg_i128_aligned_seq_cst_acquire:
+; CHECK: bl __aarch64_cas16_acq_rel
%pair = cmpxchg ptr %ptr, i128 %expected, i128 %new seq_cst acquire, align 16
%r = extractvalue { i128, i1 } %pair, 0
ret i128 %r
}
define dso_local i128 @cmpxchg_i128_aligned_seq_cst_acquire_weak(i128 %expected, i128 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i128_aligned_seq_cst_acquire_weak:
-; -O0: ldaxp x0, x1, [x4]
-; -O0: cmp x0, x9
-; -O0: cmp x1, x10
-; -O0: stlxp w8, x2, x3, [x4]
-; -O0: stlxp w8, x0, x1, [x4]
-;
-; -O1-LABEL: cmpxchg_i128_aligned_seq_cst_acquire_weak:
-; -O1: bl __aarch64_cas16_acq_rel
+; CHECK-LABEL: cmpxchg_i128_aligned_seq_cst_acquire_weak:
+; CHECK: bl __aarch64_cas16_acq_rel
%pair = cmpxchg weak ptr %ptr, i128 %expected, i128 %new seq_cst acquire, align 16
%r = extractvalue { i128, i1 } %pair, 0
ret i128 %r
}
define dso_local i128 @cmpxchg_i128_aligned_seq_cst_seq_cst(i128 %expected, i128 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i128_aligned_seq_cst_seq_cst:
-; -O0: ldaxp x0, x1, [x4]
-; -O0: cmp x0, x9
-; -O0: cmp x1, x10
-; -O0: stlxp w8, x2, x3, [x4]
-; -O0: stlxp w8, x0, x1, [x4]
-;
-; -O1-LABEL: cmpxchg_i128_aligned_seq_cst_seq_cst:
-; -O1: bl __aarch64_cas16_acq_rel
+; CHECK-LABEL: cmpxchg_i128_aligned_seq_cst_seq_cst:
+; CHECK: bl __aarch64_cas16_acq_rel
%pair = cmpxchg ptr %ptr, i128 %expected, i128 %new seq_cst seq_cst, align 16
%r = extractvalue { i128, i1 } %pair, 0
ret i128 %r
}
define dso_local i128 @cmpxchg_i128_aligned_seq_cst_seq_cst_weak(i128 %expected, i128 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i128_aligned_seq_cst_seq_cst_weak:
-; -O0: ldaxp x0, x1, [x4]
-; -O0: cmp x0, x9
-; -O0: cmp x1, x10
-; -O0: stlxp w8, x2, x3, [x4]
-; -O0: stlxp w8, x0, x1, [x4]
-;
-; -O1-LABEL: cmpxchg_i128_aligned_seq_cst_seq_cst_weak:
-; -O1: bl __aarch64_cas16_acq_rel
+; CHECK-LABEL: cmpxchg_i128_aligned_seq_cst_seq_cst_weak:
+; CHECK: bl __aarch64_cas16_acq_rel
%pair = cmpxchg weak ptr %ptr, i128 %expected, i128 %new seq_cst seq_cst, align 16
%r = extractvalue { i128, i1 } %pair, 0
ret i128 %r
}
define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic:
-; -O1: bl __aarch64_cas1_relax
+; CHECK-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic:
+; CHECK: bl __aarch64_cas1_relax
%pair = cmpxchg ptr %ptr, i8 %expected, i8 %new monotonic monotonic, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic_weak:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic_weak:
-; -O1: bl __aarch64_cas1_relax
+; CHECK-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic_weak:
+; CHECK: bl __aarch64_cas1_relax
%pair = cmpxchg weak ptr %ptr, i8 %expected, i8 %new monotonic monotonic, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_acquire:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_acquire:
-; -O1: bl __aarch64_cas1_acq
+; CHECK-LABEL: cmpxchg_i8_unaligned_monotonic_acquire:
+; CHECK: bl __aarch64_cas1_acq
%pair = cmpxchg ptr %ptr, i8 %expected, i8 %new monotonic acquire, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_acquire_weak:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_acquire_weak:
-; -O1: bl __aarch64_cas1_acq
+; CHECK-LABEL: cmpxchg_i8_unaligned_monotonic_acquire_weak:
+; CHECK: bl __aarch64_cas1_acq
%pair = cmpxchg weak ptr %ptr, i8 %expected, i8 %new monotonic acquire, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg ptr %ptr, i8 %expected, i8 %new monotonic seq_cst, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst_weak:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst_weak:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst_weak:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg weak ptr %ptr, i8 %expected, i8 %new monotonic seq_cst, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_unaligned_acquire_monotonic:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_unaligned_acquire_monotonic:
-; -O1: bl __aarch64_cas1_acq
+; CHECK-LABEL: cmpxchg_i8_unaligned_acquire_monotonic:
+; CHECK: bl __aarch64_cas1_acq
%pair = cmpxchg ptr %ptr, i8 %expected, i8 %new acquire monotonic, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_unaligned_acquire_monotonic_weak:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_unaligned_acquire_monotonic_weak:
-; -O1: bl __aarch64_cas1_acq
+; CHECK-LABEL: cmpxchg_i8_unaligned_acquire_monotonic_weak:
+; CHECK: bl __aarch64_cas1_acq
%pair = cmpxchg weak ptr %ptr, i8 %expected, i8 %new acquire monotonic, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_unaligned_acquire_acquire:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_unaligned_acquire_acquire:
-; -O1: bl __aarch64_cas1_acq
+; CHECK-LABEL: cmpxchg_i8_unaligned_acquire_acquire:
+; CHECK: bl __aarch64_cas1_acq
%pair = cmpxchg ptr %ptr, i8 %expected, i8 %new acquire acquire, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_unaligned_acquire_acquire_weak:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_unaligned_acquire_acquire_weak:
-; -O1: bl __aarch64_cas1_acq
+; CHECK-LABEL: cmpxchg_i8_unaligned_acquire_acquire_weak:
+; CHECK: bl __aarch64_cas1_acq
%pair = cmpxchg weak ptr %ptr, i8 %expected, i8 %new acquire acquire, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg ptr %ptr, i8 %expected, i8 %new acquire seq_cst, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst_weak:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst_weak:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst_weak:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg weak ptr %ptr, i8 %expected, i8 %new acquire seq_cst, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_unaligned_release_monotonic:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_unaligned_release_monotonic:
-; -O1: bl __aarch64_cas1_rel
+; CHECK-LABEL: cmpxchg_i8_unaligned_release_monotonic:
+; CHECK: bl __aarch64_cas1_rel
%pair = cmpxchg ptr %ptr, i8 %expected, i8 %new release monotonic, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_unaligned_release_monotonic_weak:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_unaligned_release_monotonic_weak:
-; -O1: bl __aarch64_cas1_rel
+; CHECK-LABEL: cmpxchg_i8_unaligned_release_monotonic_weak:
+; CHECK: bl __aarch64_cas1_rel
%pair = cmpxchg weak ptr %ptr, i8 %expected, i8 %new release monotonic, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_unaligned_release_acquire(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_unaligned_release_acquire:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_unaligned_release_acquire:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_unaligned_release_acquire:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg ptr %ptr, i8 %expected, i8 %new release acquire, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_unaligned_release_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_unaligned_release_acquire_weak:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_unaligned_release_acquire_weak:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_unaligned_release_acquire_weak:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg weak ptr %ptr, i8 %expected, i8 %new release acquire, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_unaligned_release_seq_cst:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_unaligned_release_seq_cst:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_unaligned_release_seq_cst:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg ptr %ptr, i8 %expected, i8 %new release seq_cst, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_unaligned_release_seq_cst_weak:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_unaligned_release_seq_cst_weak:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_unaligned_release_seq_cst_weak:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg weak ptr %ptr, i8 %expected, i8 %new release seq_cst, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg ptr %ptr, i8 %expected, i8 %new acq_rel monotonic, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic_weak:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic_weak:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic_weak:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg weak ptr %ptr, i8 %expected, i8 %new acq_rel monotonic, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg ptr %ptr, i8 %expected, i8 %new acq_rel acquire, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire_weak:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire_weak:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire_weak:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg weak ptr %ptr, i8 %expected, i8 %new acq_rel acquire, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg ptr %ptr, i8 %expected, i8 %new acq_rel seq_cst, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst_weak:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst_weak:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst_weak:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg weak ptr %ptr, i8 %expected, i8 %new acq_rel seq_cst, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg ptr %ptr, i8 %expected, i8 %new seq_cst monotonic, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic_weak:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic_weak:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic_weak:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg weak ptr %ptr, i8 %expected, i8 %new seq_cst monotonic, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg ptr %ptr, i8 %expected, i8 %new seq_cst acquire, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire_weak:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire_weak:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire_weak:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg weak ptr %ptr, i8 %expected, i8 %new seq_cst acquire, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg ptr %ptr, i8 %expected, i8 %new seq_cst seq_cst, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
}
define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
-; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst_weak:
-; -O0: ldaxrb w0, [x2]
-; -O0: cmp w0, w9, uxtb
-; -O0: stlxrb w8, w1, [x2]
-;
-; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst_weak:
-; -O1: bl __aarch64_cas1_acq_rel
+; CHECK-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst_weak:
+; CHECK: bl __aarch64_cas1_acq_rel
%pair = cmpxchg weak ptr %ptr, i8 %expected, i8 %new seq_cst seq_cst, align 1
%r = extractvalue { i8, i1 } %pair, 0
ret i8 %r
@@ -3362,3 +2402,6 @@ define dso_local i128 @cmpxchg_i128_unaligned_seq_cst_seq_cst_weak(i128 %expecte
%r = extractvalue { i128, i1 } %pair, 0
ret i128 %r
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; -O0: {{.*}}
+; -O1: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll
index a3d8531f5c7659..1fe63c9be8c629 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll
@@ -1,8 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=CHECK-LLSC-O1
+; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -mattr=+outline-atomics -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=CHECK-OUTLINE-LLSC-O1
; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -mattr=+lse -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=CHECK-CAS-O1
+; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -mattr=+lse,+outline-atomics -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=CHECK-CAS-O1
; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -O0 -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=CHECK-LLSC-O0
+; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -O0 -mattr=+outline-atomics -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=CHECK-OUTLINE-LLSC-O0
; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -O0 -mattr=+lse -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=CHECK-CAS-O0
+; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -O0 -mattr=+lse,+outline-atomics -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=CHECK-CAS-O0
@var = global i128 0
define void @val_compare_and_swap(ptr %p, i128 %oldval, i128 %newval) {
@@ -28,6 +32,25 @@ define void @val_compare_and_swap(ptr %p, i128 %oldval, i128 %newval) {
; CHECK-LLSC-O1-NEXT: str q0, [x0]
; CHECK-LLSC-O1-NEXT: ret
;
+; CHECK-OUTLINE-LLSC-O1-LABEL: val_compare_and_swap:
+; CHECK-OUTLINE-LLSC-O1: // %bb.0:
+; CHECK-OUTLINE-LLSC-O1-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-OUTLINE-LLSC-O1-NEXT: .cfi_def_cfa_offset 16
+; CHECK-OUTLINE-LLSC-O1-NEXT: .cfi_offset w19, -8
+; CHECK-OUTLINE-LLSC-O1-NEXT: .cfi_offset w30, -16
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov x19, x0
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov x0, x2
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov x1, x3
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov x2, x4
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov x3, x5
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov x4, x19
+; CHECK-OUTLINE-LLSC-O1-NEXT: bl __aarch64_cas16_acq
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov v0.d[0], x0
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov v0.d[1], x1
+; CHECK-OUTLINE-LLSC-O1-NEXT: str q0, [x19]
+; CHECK-OUTLINE-LLSC-O1-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-OUTLINE-LLSC-O1-NEXT: ret
+;
; CHECK-CAS-O1-LABEL: val_compare_and_swap:
; CHECK-CAS-O1: // %bb.0:
; CHECK-CAS-O1-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
@@ -63,6 +86,29 @@ define void @val_compare_and_swap(ptr %p, i128 %oldval, i128 %newval) {
; CHECK-LLSC-O0-NEXT: str q0, [x0]
; CHECK-LLSC-O0-NEXT: ret
;
+; CHECK-OUTLINE-LLSC-O0-LABEL: val_compare_and_swap:
+; CHECK-OUTLINE-LLSC-O0: // %bb.0:
+; CHECK-OUTLINE-LLSC-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-LLSC-O0-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_offset w30, -16
+; CHECK-OUTLINE-LLSC-O0-NEXT: str x0, [sp, #8] // 8-byte Folded Spill
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov x0, x2
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov x1, x3
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov x2, x4
+; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x4, [sp, #8] // 8-byte Folded Reload
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov x3, x5
+; CHECK-OUTLINE-LLSC-O0-NEXT: bl __aarch64_cas16_acq
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov x8, x0
+; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload
+; CHECK-OUTLINE-LLSC-O0-NEXT: // implicit-def: $q0
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov v0.d[0], x8
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov v0.d[1], x1
+; CHECK-OUTLINE-LLSC-O0-NEXT: str q0, [x0]
+; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-OUTLINE-LLSC-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-LLSC-O0-NEXT: ret
+;
; CHECK-CAS-O0-LABEL: val_compare_and_swap:
; CHECK-CAS-O0: // %bb.0:
; CHECK-CAS-O0-NEXT: sub sp, sp, #16
@@ -113,6 +159,25 @@ define void @val_compare_and_swap_monotonic_seqcst(ptr %p, i128 %oldval, i128 %n
; CHECK-LLSC-O1-NEXT: str q0, [x0]
; CHECK-LLSC-O1-NEXT: ret
;
+; CHECK-OUTLINE-LLSC-O1-LABEL: val_compare_and_swap_monotonic_seqcst:
+; CHECK-OUTLINE-LLSC-O1: // %bb.0:
+; CHECK-OUTLINE-LLSC-O1-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-OUTLINE-LLSC-O1-NEXT: .cfi_def_cfa_offset 16
+; CHECK-OUTLINE-LLSC-O1-NEXT: .cfi_offset w19, -8
+; CHECK-OUTLINE-LLSC-O1-NEXT: .cfi_offset w30, -16
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov x19, x0
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov x0, x2
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov x1, x3
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov x2, x4
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov x3, x5
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov x4, x19
+; CHECK-OUTLINE-LLSC-O1-NEXT: bl __aarch64_cas16_acq_rel
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov v0.d[0], x0
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov v0.d[1], x1
+; CHECK-OUTLINE-LLSC-O1-NEXT: str q0, [x19]
+; CHECK-OUTLINE-LLSC-O1-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-OUTLINE-LLSC-O1-NEXT: ret
+;
; CHECK-CAS-O1-LABEL: val_compare_and_swap_monotonic_seqcst:
; CHECK-CAS-O1: // %bb.0:
; CHECK-CAS-O1-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
@@ -148,6 +213,29 @@ define void @val_compare_and_swap_monotonic_seqcst(ptr %p, i128 %oldval, i128 %n
; CHECK-LLSC-O0-NEXT: str q0, [x0]
; CHECK-LLSC-O0-NEXT: ret
;
+; CHECK-OUTLINE-LLSC-O0-LABEL: val_compare_and_swap_monotonic_seqcst:
+; CHECK-OUTLINE-LLSC-O0: // %bb.0:
+; CHECK-OUTLINE-LLSC-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-LLSC-O0-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_offset w30, -16
+; CHECK-OUTLINE-LLSC-O0-NEXT: str x0, [sp, #8] // 8-byte Folded Spill
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov x0, x2
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov x1, x3
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov x2, x4
+; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x4, [sp, #8] // 8-byte Folded Reload
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov x3, x5
+; CHECK-OUTLINE-LLSC-O0-NEXT: bl __aarch64_cas16_acq_rel
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov x8, x0
+; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload
+; CHECK-OUTLINE-LLSC-O0-NEXT: // implicit-def: $q0
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov v0.d[0], x8
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov v0.d[1], x1
+; CHECK-OUTLINE-LLSC-O0-NEXT: str q0, [x0]
+; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-OUTLINE-LLSC-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-LLSC-O0-NEXT: ret
+;
; CHECK-CAS-O0-LABEL: val_compare_and_swap_monotonic_seqcst:
; CHECK-CAS-O0: // %bb.0:
; CHECK-CAS-O0-NEXT: sub sp, sp, #16
@@ -198,6 +286,25 @@ define void @val_compare_and_swap_release_acquire(ptr %p, i128 %oldval, i128 %ne
; CHECK-LLSC-O1-NEXT: str q0, [x0]
; CHECK-LLSC-O1-NEXT: ret
;
+; CHECK-OUTLINE-LLSC-O1-LABEL: val_compare_and_swap_release_acquire:
+; CHECK-OUTLINE-LLSC-O1: // %bb.0:
+; CHECK-OUTLINE-LLSC-O1-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-OUTLINE-LLSC-O1-NEXT: .cfi_def_cfa_offset 16
+; CHECK-OUTLINE-LLSC-O1-NEXT: .cfi_offset w19, -8
+; CHECK-OUTLINE-LLSC-O1-NEXT: .cfi_offset w30, -16
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov x19, x0
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov x0, x2
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov x1, x3
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov x2, x4
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov x3, x5
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov x4, x19
+; CHECK-OUTLINE-LLSC-O1-NEXT: bl __aarch64_cas16_acq_rel
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov v0.d[0], x0
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov v0.d[1], x1
+; CHECK-OUTLINE-LLSC-O1-NEXT: str q0, [x19]
+; CHECK-OUTLINE-LLSC-O1-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-OUTLINE-LLSC-O1-NEXT: ret
+;
; CHECK-CAS-O1-LABEL: val_compare_and_swap_release_acquire:
; CHECK-CAS-O1: // %bb.0:
; CHECK-CAS-O1-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
@@ -233,6 +340,29 @@ define void @val_compare_and_swap_release_acquire(ptr %p, i128 %oldval, i128 %ne
; CHECK-LLSC-O0-NEXT: str q0, [x0]
; CHECK-LLSC-O0-NEXT: ret
;
+; CHECK-OUTLINE-LLSC-O0-LABEL: val_compare_and_swap_release_acquire:
+; CHECK-OUTLINE-LLSC-O0: // %bb.0:
+; CHECK-OUTLINE-LLSC-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-LLSC-O0-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_offset w30, -16
+; CHECK-OUTLINE-LLSC-O0-NEXT: str x0, [sp, #8] // 8-byte Folded Spill
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov x0, x2
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov x1, x3
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov x2, x4
+; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x4, [sp, #8] // 8-byte Folded Reload
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov x3, x5
+; CHECK-OUTLINE-LLSC-O0-NEXT: bl __aarch64_cas16_acq_rel
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov x8, x0
+; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload
+; CHECK-OUTLINE-LLSC-O0-NEXT: // implicit-def: $q0
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov v0.d[0], x8
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov v0.d[1], x1
+; CHECK-OUTLINE-LLSC-O0-NEXT: str q0, [x0]
+; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-OUTLINE-LLSC-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-LLSC-O0-NEXT: ret
+;
; CHECK-CAS-O0-LABEL: val_compare_and_swap_release_acquire:
; CHECK-CAS-O0: // %bb.0:
; CHECK-CAS-O0-NEXT: sub sp, sp, #16
@@ -283,6 +413,25 @@ define void @val_compare_and_swap_monotonic(ptr %p, i128 %oldval, i128 %newval)
; CHECK-LLSC-O1-NEXT: str q0, [x0]
; CHECK-LLSC-O1-NEXT: ret
;
+; CHECK-OUTLINE-LLSC-O1-LABEL: val_compare_and_swap_monotonic:
+; CHECK-OUTLINE-LLSC-O1: // %bb.0:
+; CHECK-OUTLINE-LLSC-O1-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-OUTLINE-LLSC-O1-NEXT: .cfi_def_cfa_offset 16
+; CHECK-OUTLINE-LLSC-O1-NEXT: .cfi_offset w19, -8
+; CHECK-OUTLINE-LLSC-O1-NEXT: .cfi_offset w30, -16
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov x19, x0
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov x0, x2
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov x1, x3
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov x2, x4
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov x3, x5
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov x4, x19
+; CHECK-OUTLINE-LLSC-O1-NEXT: bl __aarch64_cas16_acq_rel
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov v0.d[0], x0
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov v0.d[1], x1
+; CHECK-OUTLINE-LLSC-O1-NEXT: str q0, [x19]
+; CHECK-OUTLINE-LLSC-O1-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-OUTLINE-LLSC-O1-NEXT: ret
+;
; CHECK-CAS-O1-LABEL: val_compare_and_swap_monotonic:
; CHECK-CAS-O1: // %bb.0:
; CHECK-CAS-O1-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
@@ -318,6 +467,29 @@ define void @val_compare_and_swap_monotonic(ptr %p, i128 %oldval, i128 %newval)
; CHECK-LLSC-O0-NEXT: str q0, [x0]
; CHECK-LLSC-O0-NEXT: ret
;
+; CHECK-OUTLINE-LLSC-O0-LABEL: val_compare_and_swap_monotonic:
+; CHECK-OUTLINE-LLSC-O0: // %bb.0:
+; CHECK-OUTLINE-LLSC-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-LLSC-O0-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_offset w30, -16
+; CHECK-OUTLINE-LLSC-O0-NEXT: str x0, [sp, #8] // 8-byte Folded Spill
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov x0, x2
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov x1, x3
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov x2, x4
+; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x4, [sp, #8] // 8-byte Folded Reload
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov x3, x5
+; CHECK-OUTLINE-LLSC-O0-NEXT: bl __aarch64_cas16_acq_rel
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov x8, x0
+; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload
+; CHECK-OUTLINE-LLSC-O0-NEXT: // implicit-def: $q0
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov v0.d[0], x8
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov v0.d[1], x1
+; CHECK-OUTLINE-LLSC-O0-NEXT: str q0, [x0]
+; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-OUTLINE-LLSC-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-LLSC-O0-NEXT: ret
+;
; CHECK-CAS-O0-LABEL: val_compare_and_swap_monotonic:
; CHECK-CAS-O0: // %bb.0:
; CHECK-CAS-O0-NEXT: sub sp, sp, #16
@@ -358,6 +530,19 @@ define void @atomic_load_relaxed(i64, i64, ptr %p, ptr %p2) {
; CHECK-LLSC-O1-NEXT: str q0, [x3]
; CHECK-LLSC-O1-NEXT: ret
;
+; CHECK-OUTLINE-LLSC-O1-LABEL: atomic_load_relaxed:
+; CHECK-OUTLINE-LLSC-O1: // %bb.0:
+; CHECK-OUTLINE-LLSC-O1-NEXT: .LBB4_1: // %atomicrmw.start
+; CHECK-OUTLINE-LLSC-O1-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-LLSC-O1-NEXT: ldxp x9, x8, [x2]
+; CHECK-OUTLINE-LLSC-O1-NEXT: stxp w10, x9, x8, [x2]
+; CHECK-OUTLINE-LLSC-O1-NEXT: cbnz w10, .LBB4_1
+; CHECK-OUTLINE-LLSC-O1-NEXT: // %bb.2: // %atomicrmw.end
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov v0.d[0], x9
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov v0.d[1], x8
+; CHECK-OUTLINE-LLSC-O1-NEXT: str q0, [x3]
+; CHECK-OUTLINE-LLSC-O1-NEXT: ret
+;
; CHECK-CAS-O1-LABEL: atomic_load_relaxed:
; CHECK-CAS-O1: // %bb.0:
; CHECK-CAS-O1-NEXT: mov x0, xzr
@@ -392,6 +577,28 @@ define void @atomic_load_relaxed(i64, i64, ptr %p, ptr %p2) {
; CHECK-LLSC-O0-NEXT: str q0, [x3]
; CHECK-LLSC-O0-NEXT: ret
;
+; CHECK-OUTLINE-LLSC-O0-LABEL: atomic_load_relaxed:
+; CHECK-OUTLINE-LLSC-O0: // %bb.0:
+; CHECK-OUTLINE-LLSC-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-LLSC-O0-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_offset w30, -16
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov x4, x2
+; CHECK-OUTLINE-LLSC-O0-NEXT: str x3, [sp, #8] // 8-byte Folded Spill
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov x3, xzr
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov x0, x3
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov x1, x3
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov x2, x3
+; CHECK-OUTLINE-LLSC-O0-NEXT: bl __aarch64_cas16_relax
+; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x3, [sp, #8] // 8-byte Folded Reload
+; CHECK-OUTLINE-LLSC-O0-NEXT: // implicit-def: $q0
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov v0.d[0], x0
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov v0.d[1], x1
+; CHECK-OUTLINE-LLSC-O0-NEXT: str q0, [x3]
+; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-OUTLINE-LLSC-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-LLSC-O0-NEXT: ret
+;
; CHECK-CAS-O0-LABEL: atomic_load_relaxed:
; CHECK-CAS-O0: // %bb.0:
; CHECK-CAS-O0-NEXT: mov x8, xzr
@@ -434,6 +641,21 @@ define i128 @val_compare_and_swap_return(ptr %p, i128 %oldval, i128 %newval) {
; CHECK-LLSC-O1-NEXT: mov x0, x8
; CHECK-LLSC-O1-NEXT: ret
;
+; CHECK-OUTLINE-LLSC-O1-LABEL: val_compare_and_swap_return:
+; CHECK-OUTLINE-LLSC-O1: // %bb.0:
+; CHECK-OUTLINE-LLSC-O1-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-OUTLINE-LLSC-O1-NEXT: .cfi_def_cfa_offset 16
+; CHECK-OUTLINE-LLSC-O1-NEXT: .cfi_offset w30, -16
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov x6, x0
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov x0, x2
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov x1, x3
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov x2, x4
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov x3, x5
+; CHECK-OUTLINE-LLSC-O1-NEXT: mov x4, x6
+; CHECK-OUTLINE-LLSC-O1-NEXT: bl __aarch64_cas16_acq
+; CHECK-OUTLINE-LLSC-O1-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-OUTLINE-LLSC-O1-NEXT: ret
+;
; CHECK-CAS-O1-LABEL: val_compare_and_swap_return:
; CHECK-CAS-O1: // %bb.0:
; CHECK-CAS-O1-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
@@ -465,6 +687,23 @@ define i128 @val_compare_and_swap_return(ptr %p, i128 %oldval, i128 %newval) {
; CHECK-LLSC-O0-NEXT: .LBB5_4:
; CHECK-LLSC-O0-NEXT: ret
;
+; CHECK-OUTLINE-LLSC-O0-LABEL: val_compare_and_swap_return:
+; CHECK-OUTLINE-LLSC-O0: // %bb.0:
+; CHECK-OUTLINE-LLSC-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-LLSC-O0-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_offset w30, -16
+; CHECK-OUTLINE-LLSC-O0-NEXT: str x0, [sp, #8] // 8-byte Folded Spill
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov x0, x2
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov x1, x3
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov x2, x4
+; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x4, [sp, #8] // 8-byte Folded Reload
+; CHECK-OUTLINE-LLSC-O0-NEXT: mov x3, x5
+; CHECK-OUTLINE-LLSC-O0-NEXT: bl __aarch64_cas16_acq
+; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-OUTLINE-LLSC-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-LLSC-O0-NEXT: ret
+;
; CHECK-CAS-O0-LABEL: val_compare_and_swap_return:
; CHECK-CAS-O0: // %bb.0:
; CHECK-CAS-O0-NEXT: mov x8, x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
index d03647f8b294ef..dd516e4a1e6c74 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
@@ -1,8 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-NOLSE,CHECK-NOLSE-O1
+; RUN: llc < %s -mtriple=arm64-apple-ios -mattr=+outline-atomics -global-isel -global-isel-abort=1 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-OUTLINE,CHECK-OUTLINE-O1
; RUN: llc < %s -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -O0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-NOLSE,CHECK-NOLSE-O0
+; RUN: llc < %s -mtriple=arm64-apple-ios -mattr=+outline-atomics -global-isel -global-isel-abort=1 -O0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-OUTLINE,CHECK-OUTLINE-O0
; RUN: llc < %s -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -mcpu=apple-a13 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-LSE-O1
+; RUN: llc < %s -mtriple=arm64-apple-ios -mattr=+outline-atomics -global-isel -global-isel-abort=1 -mcpu=apple-a13 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-LSE-O1
; RUN: llc < %s -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -mcpu=apple-a13 -O0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-LSE-O0
+; RUN: llc < %s -mtriple=arm64-apple-ios -mattr=+outline-atomics -global-isel -global-isel-abort=1 -mcpu=apple-a13 -O0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-LSE-O0
define i32 @val_compare_and_swap(ptr %p, i32 %cmp, i32 %new) #0 {
; CHECK-NOLSE-O1-LABEL: val_compare_and_swap:
@@ -24,6 +28,17 @@ define i32 @val_compare_and_swap(ptr %p, i32 %cmp, i32 %new) #0 {
; CHECK-NOLSE-O1-NEXT: mov w0, w8
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: val_compare_and_swap:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: mov x3, x0
+; CHECK-OUTLINE-O1-NEXT: mov w0, w1
+; CHECK-OUTLINE-O1-NEXT: mov w1, w2
+; CHECK-OUTLINE-O1-NEXT: mov x2, x3
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_cas4_acq
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: val_compare_and_swap:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: mov x9, x0
@@ -37,6 +52,19 @@ define i32 @val_compare_and_swap(ptr %p, i32 %cmp, i32 %new) #0 {
; CHECK-NOLSE-O0-NEXT: LBB0_3:
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: val_compare_and_swap:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov w0, w1
+; CHECK-OUTLINE-O0-NEXT: mov w1, w2
+; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas4_acq
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: val_compare_and_swap:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: casa w1, w2, [x0]
@@ -75,6 +103,18 @@ define i32 @val_compare_and_swap_from_load(ptr %p, i32 %cmp, ptr %pnew) #0 {
; CHECK-NOLSE-O1-NEXT: mov w0, w8
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: val_compare_and_swap_from_load:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: ldr w8, [x2]
+; CHECK-OUTLINE-O1-NEXT: mov x3, x0
+; CHECK-OUTLINE-O1-NEXT: mov w0, w1
+; CHECK-OUTLINE-O1-NEXT: mov x2, x3
+; CHECK-OUTLINE-O1-NEXT: mov w1, w8
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_cas4_acq
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: val_compare_and_swap_from_load:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: mov x9, x0
@@ -89,6 +129,20 @@ define i32 @val_compare_and_swap_from_load(ptr %p, i32 %cmp, ptr %pnew) #0 {
; CHECK-NOLSE-O0-NEXT: LBB1_3:
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: val_compare_and_swap_from_load:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov w0, w1
+; CHECK-OUTLINE-O0-NEXT: mov x8, x2
+; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr w1, [x8]
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas4_acq
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: val_compare_and_swap_from_load:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: ldr w8, [x2]
@@ -129,6 +183,17 @@ define i32 @val_compare_and_swap_rel(ptr %p, i32 %cmp, i32 %new) #0 {
; CHECK-NOLSE-O1-NEXT: mov w0, w8
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: val_compare_and_swap_rel:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: mov x3, x0
+; CHECK-OUTLINE-O1-NEXT: mov w0, w1
+; CHECK-OUTLINE-O1-NEXT: mov w1, w2
+; CHECK-OUTLINE-O1-NEXT: mov x2, x3
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_cas4_acq_rel
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: val_compare_and_swap_rel:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: mov x9, x0
@@ -142,6 +207,19 @@ define i32 @val_compare_and_swap_rel(ptr %p, i32 %cmp, i32 %new) #0 {
; CHECK-NOLSE-O0-NEXT: LBB2_3:
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: val_compare_and_swap_rel:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov w0, w1
+; CHECK-OUTLINE-O0-NEXT: mov w1, w2
+; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas4_acq_rel
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: val_compare_and_swap_rel:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: casal w1, w2, [x0]
@@ -179,6 +257,17 @@ define i64 @val_compare_and_swap_64(ptr %p, i64 %cmp, i64 %new) #0 {
; CHECK-NOLSE-O1-NEXT: mov x0, x8
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: val_compare_and_swap_64:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: mov x3, x0
+; CHECK-OUTLINE-O1-NEXT: mov x0, x1
+; CHECK-OUTLINE-O1-NEXT: mov x1, x2
+; CHECK-OUTLINE-O1-NEXT: mov x2, x3
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_cas8_relax
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: val_compare_and_swap_64:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: mov x9, x0
@@ -192,6 +281,19 @@ define i64 @val_compare_and_swap_64(ptr %p, i64 %cmp, i64 %new) #0 {
; CHECK-NOLSE-O0-NEXT: LBB3_3:
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: val_compare_and_swap_64:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov x0, x1
+; CHECK-OUTLINE-O0-NEXT: mov x1, x2
+; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas8_relax
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: val_compare_and_swap_64:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: cas x1, x2, [x0]
@@ -229,6 +331,17 @@ define i64 @val_compare_and_swap_64_monotonic_seqcst(ptr %p, i64 %cmp, i64 %new)
; CHECK-NOLSE-O1-NEXT: mov x0, x8
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: val_compare_and_swap_64_monotonic_seqcst:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: mov x3, x0
+; CHECK-OUTLINE-O1-NEXT: mov x0, x1
+; CHECK-OUTLINE-O1-NEXT: mov x1, x2
+; CHECK-OUTLINE-O1-NEXT: mov x2, x3
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_cas8_acq_rel
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: val_compare_and_swap_64_monotonic_seqcst:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: mov x9, x0
@@ -242,6 +355,19 @@ define i64 @val_compare_and_swap_64_monotonic_seqcst(ptr %p, i64 %cmp, i64 %new)
; CHECK-NOLSE-O0-NEXT: LBB4_3:
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: val_compare_and_swap_64_monotonic_seqcst:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov x0, x1
+; CHECK-OUTLINE-O0-NEXT: mov x1, x2
+; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas8_acq_rel
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: val_compare_and_swap_64_monotonic_seqcst:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: casal x1, x2, [x0]
@@ -279,6 +405,17 @@ define i64 @val_compare_and_swap_64_release_acquire(ptr %p, i64 %cmp, i64 %new)
; CHECK-NOLSE-O1-NEXT: mov x0, x8
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: val_compare_and_swap_64_release_acquire:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: mov x3, x0
+; CHECK-OUTLINE-O1-NEXT: mov x0, x1
+; CHECK-OUTLINE-O1-NEXT: mov x1, x2
+; CHECK-OUTLINE-O1-NEXT: mov x2, x3
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_cas8_acq_rel
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: val_compare_and_swap_64_release_acquire:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: mov x9, x0
@@ -292,6 +429,19 @@ define i64 @val_compare_and_swap_64_release_acquire(ptr %p, i64 %cmp, i64 %new)
; CHECK-NOLSE-O0-NEXT: LBB5_3:
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: val_compare_and_swap_64_release_acquire:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov x0, x1
+; CHECK-OUTLINE-O0-NEXT: mov x1, x2
+; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas8_acq_rel
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: val_compare_and_swap_64_release_acquire:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: casal x1, x2, [x0]
@@ -323,6 +473,19 @@ define i32 @fetch_and_nand(ptr %p) #0 {
; CHECK-NOLSE-O1-NEXT: mov w0, w8
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: fetch_and_nand:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: LBB6_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O1-NEXT: ldxr w8, [x0]
+; CHECK-OUTLINE-O1-NEXT: and w9, w8, #0x7
+; CHECK-OUTLINE-O1-NEXT: mvn w9, w9
+; CHECK-OUTLINE-O1-NEXT: stlxr w10, w9, [x0]
+; CHECK-OUTLINE-O1-NEXT: cbnz w10, LBB6_1
+; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-OUTLINE-O1-NEXT: mov w0, w8
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: fetch_and_nand:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
@@ -360,6 +523,35 @@ define i32 @fetch_and_nand(ptr %p) #0 {
; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: fetch_and_nand:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #48
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: ldr w0, [x0]
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: b LBB6_1
+; CHECK-OUTLINE-O0-NEXT: LBB6_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: and w8, w0, #0x7
+; CHECK-OUTLINE-O0-NEXT: mvn w1, w8
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas4_rel
+; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #8] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: subs w8, w0, w8
+; CHECK-OUTLINE-O0-NEXT: cset w8, eq
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB6_1
+; CHECK-OUTLINE-O0-NEXT: b LBB6_2
+; CHECK-OUTLINE-O0-NEXT: LBB6_2: ; %atomicrmw.end
+; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #48
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: fetch_and_nand:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: mov x8, x0
@@ -418,6 +610,19 @@ define i64 @fetch_and_nand_64(ptr %p) #0 {
; CHECK-NOLSE-O1-NEXT: mov x0, x8
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: fetch_and_nand_64:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: LBB7_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O1-NEXT: ldaxr x8, [x0]
+; CHECK-OUTLINE-O1-NEXT: and x9, x8, #0x7
+; CHECK-OUTLINE-O1-NEXT: mvn x9, x9
+; CHECK-OUTLINE-O1-NEXT: stlxr w10, x9, [x0]
+; CHECK-OUTLINE-O1-NEXT: cbnz w10, LBB7_1
+; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-OUTLINE-O1-NEXT: mov x0, x8
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: fetch_and_nand_64:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
@@ -455,6 +660,35 @@ define i64 @fetch_and_nand_64(ptr %p) #0 {
; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: fetch_and_nand_64:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #48
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: ldr x0, [x0]
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #24] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: b LBB7_1
+; CHECK-OUTLINE-O0-NEXT: LBB7_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O0-NEXT: ldr x0, [sp, #24] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: and x8, x0, #0x7
+; CHECK-OUTLINE-O0-NEXT: mvn x1, x8
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas8_acq_rel
+; CHECK-OUTLINE-O0-NEXT: ldr x8, [sp] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: subs x8, x0, x8
+; CHECK-OUTLINE-O0-NEXT: cset w8, eq
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #24] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB7_1
+; CHECK-OUTLINE-O0-NEXT: b LBB7_2
+; CHECK-OUTLINE-O0-NEXT: LBB7_2: ; %atomicrmw.end
+; CHECK-OUTLINE-O0-NEXT: ldr x0, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #48
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: fetch_and_nand_64:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: mov x8, x0
@@ -513,6 +747,15 @@ define i32 @fetch_and_or(ptr %p) #0 {
; CHECK-NOLSE-O1-NEXT: mov w0, w8
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-LABEL: fetch_and_or:
+; CHECK-OUTLINE: ; %bb.0:
+; CHECK-OUTLINE-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-NEXT: mov x1, x0
+; CHECK-OUTLINE-NEXT: mov w0, #5 ; =0x5
+; CHECK-OUTLINE-NEXT: bl ___aarch64_ldset4_acq_rel
+; CHECK-OUTLINE-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: fetch_and_or:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
@@ -578,6 +821,15 @@ define i64 @fetch_and_or_64(ptr %p) #0 {
; CHECK-NOLSE-O1-NEXT: mov x0, x8
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: fetch_and_or_64:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: mov x1, x0
+; CHECK-OUTLINE-O1-NEXT: mov w0, #7 ; =0x7
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_ldset8_relax
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: fetch_and_or_64:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
@@ -614,6 +866,16 @@ define i64 @fetch_and_or_64(ptr %p) #0 {
; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: fetch_and_or_64:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov x1, x0
+; CHECK-OUTLINE-O0-NEXT: mov w8, #7 ; =0x7
+; CHECK-OUTLINE-O0-NEXT: mov w0, w8
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldset8_relax
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: fetch_and_or_64:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: mov w8, #7 ; =0x7
@@ -636,6 +898,11 @@ define void @acquire_fence() #0 {
; CHECK-NOLSE-NEXT: dmb ishld
; CHECK-NOLSE-NEXT: ret
;
+; CHECK-OUTLINE-LABEL: acquire_fence:
+; CHECK-OUTLINE: ; %bb.0:
+; CHECK-OUTLINE-NEXT: dmb ishld
+; CHECK-OUTLINE-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: acquire_fence:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: dmb ishld
@@ -655,6 +922,11 @@ define void @release_fence() #0 {
; CHECK-NOLSE-NEXT: dmb ish
; CHECK-NOLSE-NEXT: ret
;
+; CHECK-OUTLINE-LABEL: release_fence:
+; CHECK-OUTLINE: ; %bb.0:
+; CHECK-OUTLINE-NEXT: dmb ish
+; CHECK-OUTLINE-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: release_fence:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: dmb ish
@@ -674,6 +946,11 @@ define void @seq_cst_fence() #0 {
; CHECK-NOLSE-NEXT: dmb ish
; CHECK-NOLSE-NEXT: ret
;
+; CHECK-OUTLINE-LABEL: seq_cst_fence:
+; CHECK-OUTLINE: ; %bb.0:
+; CHECK-OUTLINE-NEXT: dmb ish
+; CHECK-OUTLINE-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: seq_cst_fence:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: dmb ish
@@ -693,6 +970,11 @@ define i32 @atomic_load(ptr %p) #0 {
; CHECK-NOLSE-NEXT: ldar w0, [x0]
; CHECK-NOLSE-NEXT: ret
;
+; CHECK-OUTLINE-LABEL: atomic_load:
+; CHECK-OUTLINE: ; %bb.0:
+; CHECK-OUTLINE-NEXT: ldar w0, [x0]
+; CHECK-OUTLINE-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: atomic_load:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: ldar w0, [x0]
@@ -719,6 +1001,18 @@ define i8 @atomic_load_relaxed_8(ptr %p, i32 %off32) #0 {
; CHECK-NOLSE-O1-NEXT: add w0, w8, w9
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: atomic_load_relaxed_8:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: ldrb w8, [x0, #4095]
+; CHECK-OUTLINE-O1-NEXT: ldrb w9, [x0, w1, sxtw]
+; CHECK-OUTLINE-O1-NEXT: add x11, x0, #291, lsl #12 ; =1191936
+; CHECK-OUTLINE-O1-NEXT: ldurb w10, [x0, #-256]
+; CHECK-OUTLINE-O1-NEXT: add w8, w8, w9
+; CHECK-OUTLINE-O1-NEXT: ldrb w9, [x11]
+; CHECK-OUTLINE-O1-NEXT: add w8, w8, w10
+; CHECK-OUTLINE-O1-NEXT: add w0, w8, w9
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: atomic_load_relaxed_8:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: ldrb w9, [x0, #4095]
@@ -733,6 +1027,20 @@ define i8 @atomic_load_relaxed_8(ptr %p, i32 %off32) #0 {
; CHECK-NOLSE-O0-NEXT: add w0, w8, w9, uxtb
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: atomic_load_relaxed_8:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: ldrb w9, [x0, #4095]
+; CHECK-OUTLINE-O0-NEXT: add x8, x0, w1, sxtw
+; CHECK-OUTLINE-O0-NEXT: ldrb w8, [x8]
+; CHECK-OUTLINE-O0-NEXT: add w8, w8, w9, uxtb
+; CHECK-OUTLINE-O0-NEXT: subs x9, x0, #256
+; CHECK-OUTLINE-O0-NEXT: ldrb w9, [x9]
+; CHECK-OUTLINE-O0-NEXT: add w8, w8, w9, uxtb
+; CHECK-OUTLINE-O0-NEXT: add x9, x0, #291, lsl #12 ; =1191936
+; CHECK-OUTLINE-O0-NEXT: ldrb w9, [x9]
+; CHECK-OUTLINE-O0-NEXT: add w0, w8, w9, uxtb
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: atomic_load_relaxed_8:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: ldrb w8, [x0, #4095]
@@ -789,6 +1097,18 @@ define i16 @atomic_load_relaxed_16(ptr %p, i32 %off32) #0 {
; CHECK-NOLSE-O1-NEXT: add w0, w8, w9
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: atomic_load_relaxed_16:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: ldrh w8, [x0, #8190]
+; CHECK-OUTLINE-O1-NEXT: ldrh w9, [x0, w1, sxtw #1]
+; CHECK-OUTLINE-O1-NEXT: add x11, x0, #291, lsl #12 ; =1191936
+; CHECK-OUTLINE-O1-NEXT: ldurh w10, [x0, #-256]
+; CHECK-OUTLINE-O1-NEXT: add w8, w8, w9
+; CHECK-OUTLINE-O1-NEXT: ldrh w9, [x11]
+; CHECK-OUTLINE-O1-NEXT: add w8, w8, w10
+; CHECK-OUTLINE-O1-NEXT: add w0, w8, w9
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: atomic_load_relaxed_16:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: ldrh w9, [x0, #8190]
@@ -803,6 +1123,20 @@ define i16 @atomic_load_relaxed_16(ptr %p, i32 %off32) #0 {
; CHECK-NOLSE-O0-NEXT: add w0, w8, w9, uxth
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: atomic_load_relaxed_16:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: ldrh w9, [x0, #8190]
+; CHECK-OUTLINE-O0-NEXT: add x8, x0, w1, sxtw #1
+; CHECK-OUTLINE-O0-NEXT: ldrh w8, [x8]
+; CHECK-OUTLINE-O0-NEXT: add w8, w8, w9, uxth
+; CHECK-OUTLINE-O0-NEXT: subs x9, x0, #256
+; CHECK-OUTLINE-O0-NEXT: ldrh w9, [x9]
+; CHECK-OUTLINE-O0-NEXT: add w8, w8, w9, uxth
+; CHECK-OUTLINE-O0-NEXT: add x9, x0, #291, lsl #12 ; =1191936
+; CHECK-OUTLINE-O0-NEXT: ldrh w9, [x9]
+; CHECK-OUTLINE-O0-NEXT: add w0, w8, w9, uxth
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: atomic_load_relaxed_16:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: ldrh w8, [x0, #8190]
@@ -859,6 +1193,18 @@ define i32 @atomic_load_relaxed_32(ptr %p, i32 %off32) #0 {
; CHECK-NOLSE-O1-NEXT: add w0, w8, w9
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: atomic_load_relaxed_32:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: ldr w8, [x0, #16380]
+; CHECK-OUTLINE-O1-NEXT: ldr w9, [x0, w1, sxtw #2]
+; CHECK-OUTLINE-O1-NEXT: add x11, x0, #291, lsl #12 ; =1191936
+; CHECK-OUTLINE-O1-NEXT: ldur w10, [x0, #-256]
+; CHECK-OUTLINE-O1-NEXT: add w8, w8, w9
+; CHECK-OUTLINE-O1-NEXT: ldr w9, [x11]
+; CHECK-OUTLINE-O1-NEXT: add w8, w8, w10
+; CHECK-OUTLINE-O1-NEXT: add w0, w8, w9
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: atomic_load_relaxed_32:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: ldr w8, [x0, #16380]
@@ -871,6 +1217,18 @@ define i32 @atomic_load_relaxed_32(ptr %p, i32 %off32) #0 {
; CHECK-NOLSE-O0-NEXT: add w0, w8, w9
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: atomic_load_relaxed_32:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: ldr w8, [x0, #16380]
+; CHECK-OUTLINE-O0-NEXT: ldr w9, [x0, w1, sxtw #2]
+; CHECK-OUTLINE-O0-NEXT: add w8, w8, w9
+; CHECK-OUTLINE-O0-NEXT: ldur w9, [x0, #-256]
+; CHECK-OUTLINE-O0-NEXT: add w8, w8, w9
+; CHECK-OUTLINE-O0-NEXT: add x9, x0, #291, lsl #12 ; =1191936
+; CHECK-OUTLINE-O0-NEXT: ldr w9, [x9]
+; CHECK-OUTLINE-O0-NEXT: add w0, w8, w9
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: atomic_load_relaxed_32:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: ldr w8, [x0, #16380]
@@ -925,6 +1283,18 @@ define i64 @atomic_load_relaxed_64(ptr %p, i32 %off32) #0 {
; CHECK-NOLSE-O1-NEXT: add x0, x8, x9
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: atomic_load_relaxed_64:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: ldr x8, [x0, #32760]
+; CHECK-OUTLINE-O1-NEXT: ldr x9, [x0, w1, sxtw #3]
+; CHECK-OUTLINE-O1-NEXT: add x11, x0, #291, lsl #12 ; =1191936
+; CHECK-OUTLINE-O1-NEXT: ldur x10, [x0, #-256]
+; CHECK-OUTLINE-O1-NEXT: add x8, x8, x9
+; CHECK-OUTLINE-O1-NEXT: ldr x9, [x11]
+; CHECK-OUTLINE-O1-NEXT: add x8, x8, x10
+; CHECK-OUTLINE-O1-NEXT: add x0, x8, x9
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: atomic_load_relaxed_64:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: ldr x8, [x0, #32760]
@@ -937,6 +1307,18 @@ define i64 @atomic_load_relaxed_64(ptr %p, i32 %off32) #0 {
; CHECK-NOLSE-O0-NEXT: add x0, x8, x9
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: atomic_load_relaxed_64:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: ldr x8, [x0, #32760]
+; CHECK-OUTLINE-O0-NEXT: ldr x9, [x0, w1, sxtw #3]
+; CHECK-OUTLINE-O0-NEXT: add x8, x8, x9
+; CHECK-OUTLINE-O0-NEXT: ldur x9, [x0, #-256]
+; CHECK-OUTLINE-O0-NEXT: add x8, x8, x9
+; CHECK-OUTLINE-O0-NEXT: add x9, x0, #291, lsl #12 ; =1191936
+; CHECK-OUTLINE-O0-NEXT: ldr x9, [x9]
+; CHECK-OUTLINE-O0-NEXT: add x0, x8, x9
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: atomic_load_relaxed_64:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: ldr x8, [x0, #32760]
@@ -986,6 +1368,12 @@ define void @atomc_store(ptr %p) #0 {
; CHECK-NOLSE-NEXT: stlr w8, [x0]
; CHECK-NOLSE-NEXT: ret
;
+; CHECK-OUTLINE-LABEL: atomc_store:
+; CHECK-OUTLINE: ; %bb.0:
+; CHECK-OUTLINE-NEXT: mov w8, #4 ; =0x4
+; CHECK-OUTLINE-NEXT: stlr w8, [x0]
+; CHECK-OUTLINE-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: atomc_store:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: mov w8, #4 ; =0x4
@@ -1011,6 +1399,15 @@ define void @atomic_store_relaxed_8(ptr %p, i32 %off32, i8 %val) #0 {
; CHECK-NOLSE-O1-NEXT: strb w2, [x8]
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: atomic_store_relaxed_8:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936
+; CHECK-OUTLINE-O1-NEXT: strb w2, [x0, #4095]
+; CHECK-OUTLINE-O1-NEXT: strb w2, [x0, w1, sxtw]
+; CHECK-OUTLINE-O1-NEXT: sturb w2, [x0, #-256]
+; CHECK-OUTLINE-O1-NEXT: strb w2, [x8]
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: atomic_store_relaxed_8:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: strb w2, [x0, #4095]
@@ -1020,6 +1417,15 @@ define void @atomic_store_relaxed_8(ptr %p, i32 %off32, i8 %val) #0 {
; CHECK-NOLSE-O0-NEXT: strb w2, [x8]
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: atomic_store_relaxed_8:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: strb w2, [x0, #4095]
+; CHECK-OUTLINE-O0-NEXT: strb w2, [x0, w1, sxtw]
+; CHECK-OUTLINE-O0-NEXT: sturb w2, [x0, #-256]
+; CHECK-OUTLINE-O0-NEXT: add x8, x0, #291, lsl #12 ; =1191936
+; CHECK-OUTLINE-O0-NEXT: strb w2, [x8]
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: atomic_store_relaxed_8:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: strb w2, [x0, #4095]
@@ -1062,6 +1468,15 @@ define void @atomic_store_relaxed_16(ptr %p, i32 %off32, i16 %val) #0 {
; CHECK-NOLSE-O1-NEXT: strh w2, [x8]
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: atomic_store_relaxed_16:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936
+; CHECK-OUTLINE-O1-NEXT: strh w2, [x0, #8190]
+; CHECK-OUTLINE-O1-NEXT: strh w2, [x0, w1, sxtw #1]
+; CHECK-OUTLINE-O1-NEXT: sturh w2, [x0, #-256]
+; CHECK-OUTLINE-O1-NEXT: strh w2, [x8]
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: atomic_store_relaxed_16:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: strh w2, [x0, #8190]
@@ -1071,6 +1486,15 @@ define void @atomic_store_relaxed_16(ptr %p, i32 %off32, i16 %val) #0 {
; CHECK-NOLSE-O0-NEXT: strh w2, [x8]
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: atomic_store_relaxed_16:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: strh w2, [x0, #8190]
+; CHECK-OUTLINE-O0-NEXT: strh w2, [x0, w1, sxtw #1]
+; CHECK-OUTLINE-O0-NEXT: sturh w2, [x0, #-256]
+; CHECK-OUTLINE-O0-NEXT: add x8, x0, #291, lsl #12 ; =1191936
+; CHECK-OUTLINE-O0-NEXT: strh w2, [x8]
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: atomic_store_relaxed_16:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: strh w2, [x0, #8190]
@@ -1113,6 +1537,15 @@ define void @atomic_store_relaxed_32(ptr %p, i32 %off32, i32 %val) #0 {
; CHECK-NOLSE-O1-NEXT: str w2, [x8]
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: atomic_store_relaxed_32:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936
+; CHECK-OUTLINE-O1-NEXT: str w2, [x0, #16380]
+; CHECK-OUTLINE-O1-NEXT: str w2, [x0, w1, sxtw #2]
+; CHECK-OUTLINE-O1-NEXT: stur w2, [x0, #-256]
+; CHECK-OUTLINE-O1-NEXT: str w2, [x8]
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: atomic_store_relaxed_32:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: str w2, [x0, #16380]
@@ -1122,6 +1555,15 @@ define void @atomic_store_relaxed_32(ptr %p, i32 %off32, i32 %val) #0 {
; CHECK-NOLSE-O0-NEXT: str w2, [x8]
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: atomic_store_relaxed_32:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: str w2, [x0, #16380]
+; CHECK-OUTLINE-O0-NEXT: str w2, [x0, w1, sxtw #2]
+; CHECK-OUTLINE-O0-NEXT: stur w2, [x0, #-256]
+; CHECK-OUTLINE-O0-NEXT: add x8, x0, #291, lsl #12 ; =1191936
+; CHECK-OUTLINE-O0-NEXT: str w2, [x8]
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: atomic_store_relaxed_32:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: str w2, [x0, #16380]
@@ -1164,6 +1606,15 @@ define void @atomic_store_relaxed_64(ptr %p, i32 %off32, i64 %val) #0 {
; CHECK-NOLSE-O1-NEXT: str x2, [x8]
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: atomic_store_relaxed_64:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936
+; CHECK-OUTLINE-O1-NEXT: str x2, [x0, #32760]
+; CHECK-OUTLINE-O1-NEXT: str x2, [x0, w1, sxtw #3]
+; CHECK-OUTLINE-O1-NEXT: stur x2, [x0, #-256]
+; CHECK-OUTLINE-O1-NEXT: str x2, [x8]
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: atomic_store_relaxed_64:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: str x2, [x0, #32760]
@@ -1173,6 +1624,15 @@ define void @atomic_store_relaxed_64(ptr %p, i32 %off32, i64 %val) #0 {
; CHECK-NOLSE-O0-NEXT: str x2, [x8]
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: atomic_store_relaxed_64:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: str x2, [x0, #32760]
+; CHECK-OUTLINE-O0-NEXT: str x2, [x0, w1, sxtw #3]
+; CHECK-OUTLINE-O0-NEXT: stur x2, [x0, #-256]
+; CHECK-OUTLINE-O0-NEXT: add x8, x0, #291, lsl #12 ; =1191936
+; CHECK-OUTLINE-O0-NEXT: str x2, [x8]
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: atomic_store_relaxed_64:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: str x2, [x0, #32760]
@@ -1213,6 +1673,13 @@ define i32 @load_zext(ptr %p8, ptr %p16) {
; CHECK-NOLSE-O1-NEXT: add w0, w9, w8, uxtb
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: load_zext:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: ldarb w8, [x0]
+; CHECK-OUTLINE-O1-NEXT: ldrh w9, [x1]
+; CHECK-OUTLINE-O1-NEXT: add w0, w9, w8, uxtb
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: load_zext:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: ldarb w9, [x0]
@@ -1220,6 +1687,13 @@ define i32 @load_zext(ptr %p8, ptr %p16) {
; CHECK-NOLSE-O0-NEXT: add w0, w8, w9, uxtb
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: load_zext:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: ldarb w9, [x0]
+; CHECK-OUTLINE-O0-NEXT: ldrh w8, [x1]
+; CHECK-OUTLINE-O0-NEXT: add w0, w8, w9, uxtb
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: load_zext:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: ldaprb w8, [x0]
@@ -1250,6 +1724,12 @@ define { i32, i64 } @load_acq(ptr %p32, ptr %p64) {
; CHECK-NOLSE-NEXT: ldar x1, [x1]
; CHECK-NOLSE-NEXT: ret
;
+; CHECK-OUTLINE-LABEL: load_acq:
+; CHECK-OUTLINE: ; %bb.0:
+; CHECK-OUTLINE-NEXT: ldar w0, [x0]
+; CHECK-OUTLINE-NEXT: ldar x1, [x1]
+; CHECK-OUTLINE-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: load_acq:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: ldar w0, [x0]
@@ -1279,6 +1759,14 @@ define i32 @load_sext(ptr %p8, ptr %p16) {
; CHECK-NOLSE-O1-NEXT: add w0, w9, w8, sxtb
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: load_sext:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: ldarb w8, [x0]
+; CHECK-OUTLINE-O1-NEXT: ldrh w9, [x1]
+; CHECK-OUTLINE-O1-NEXT: sxth w9, w9
+; CHECK-OUTLINE-O1-NEXT: add w0, w9, w8, sxtb
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: load_sext:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: ldarb w9, [x0]
@@ -1287,6 +1775,14 @@ define i32 @load_sext(ptr %p8, ptr %p16) {
; CHECK-NOLSE-O0-NEXT: add w0, w8, w9, sxtb
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: load_sext:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: ldarb w9, [x0]
+; CHECK-OUTLINE-O0-NEXT: ldrh w8, [x1]
+; CHECK-OUTLINE-O0-NEXT: sxth w8, w8
+; CHECK-OUTLINE-O0-NEXT: add w0, w8, w9, sxtb
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: load_sext:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: ldaprb w8, [x0]
@@ -1319,6 +1815,12 @@ define void @store_trunc(i32 %val, ptr %p8, ptr %p16) {
; CHECK-NOLSE-NEXT: strh w0, [x2]
; CHECK-NOLSE-NEXT: ret
;
+; CHECK-OUTLINE-LABEL: store_trunc:
+; CHECK-OUTLINE: ; %bb.0:
+; CHECK-OUTLINE-NEXT: stlrb w0, [x1]
+; CHECK-OUTLINE-NEXT: strh w0, [x2]
+; CHECK-OUTLINE-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: store_trunc:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: stlrb w0, [x1]
@@ -1352,6 +1854,19 @@ define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O1-NEXT: mov w0, w8
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_add_i8:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: .cfi_def_cfa_offset 16
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O1-NEXT: mov x2, x0
+; CHECK-OUTLINE-O1-NEXT: mov w0, w1
+; CHECK-OUTLINE-O1-NEXT: mov x1, x2
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_ldadd1_acq_rel
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: atomicrmw_add_i8:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
@@ -1392,6 +1907,21 @@ define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_add_i8:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov w0, w1
+; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldadd1_acq_rel
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: atomicrmw_add_i8:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: ldaddalb w1, w0, [x0]
@@ -1418,6 +1948,19 @@ define i8 @atomicrmw_xchg_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O1-NEXT: mov w0, w8
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_xchg_i8:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: .cfi_def_cfa_offset 16
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O1-NEXT: mov x2, x0
+; CHECK-OUTLINE-O1-NEXT: mov w0, w1
+; CHECK-OUTLINE-O1-NEXT: mov x1, x2
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_swp1_relax
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: atomicrmw_xchg_i8:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
@@ -1457,6 +2000,21 @@ define i8 @atomicrmw_xchg_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_xchg_i8:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov w0, w1
+; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_swp1_relax
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: atomicrmw_xchg_i8:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: swpb w1, w0, [x0]
@@ -1483,6 +2041,19 @@ define i8 @atomicrmw_sub_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O1-NEXT: mov w0, w8
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_sub_i8:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: .cfi_def_cfa_offset 16
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O1-NEXT: mov x2, x0
+; CHECK-OUTLINE-O1-NEXT: neg w0, w1
+; CHECK-OUTLINE-O1-NEXT: mov x1, x2
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_ldadd1_acq
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: atomicrmw_sub_i8:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
@@ -1523,6 +2094,23 @@ define i8 @atomicrmw_sub_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_sub_i8:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov w9, w1
+; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: mov w8, wzr
+; CHECK-OUTLINE-O0-NEXT: subs w0, w8, w9
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldadd1_acq
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: atomicrmw_sub_i8:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: neg w8, w1
@@ -1551,6 +2139,20 @@ define i8 @atomicrmw_and_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O1-NEXT: mov w0, w8
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_and_i8:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: .cfi_def_cfa_offset 16
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O1-NEXT: mov x2, x0
+; CHECK-OUTLINE-O1-NEXT: mov w8, #-1 ; =0xffffffff
+; CHECK-OUTLINE-O1-NEXT: eor w0, w8, w1
+; CHECK-OUTLINE-O1-NEXT: mov x1, x2
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_ldclr1_rel
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: atomicrmw_and_i8:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
@@ -1591,6 +2193,23 @@ define i8 @atomicrmw_and_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_and_i8:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov w9, w1
+; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: mov w8, #-1 ; =0xffffffff
+; CHECK-OUTLINE-O0-NEXT: eor w0, w8, w9
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldclr1_rel
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: atomicrmw_and_i8:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: mvn w8, w1
@@ -1619,6 +2238,19 @@ define i8 @atomicrmw_or_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O1-NEXT: mov w0, w8
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_or_i8:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: .cfi_def_cfa_offset 16
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O1-NEXT: mov x2, x0
+; CHECK-OUTLINE-O1-NEXT: mov w0, w1
+; CHECK-OUTLINE-O1-NEXT: mov x1, x2
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_ldset1_acq_rel
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: atomicrmw_or_i8:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
@@ -1659,6 +2291,21 @@ define i8 @atomicrmw_or_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_or_i8:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov w0, w1
+; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldset1_acq_rel
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: atomicrmw_or_i8:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: ldsetalb w1, w0, [x0]
@@ -1685,6 +2332,19 @@ define i8 @atomicrmw_xor_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O1-NEXT: mov w0, w8
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_xor_i8:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: .cfi_def_cfa_offset 16
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O1-NEXT: mov x2, x0
+; CHECK-OUTLINE-O1-NEXT: mov w0, w1
+; CHECK-OUTLINE-O1-NEXT: mov x1, x2
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_ldeor1_relax
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: atomicrmw_xor_i8:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
@@ -1725,6 +2385,21 @@ define i8 @atomicrmw_xor_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_xor_i8:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov w0, w1
+; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldeor1_relax
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: atomicrmw_xor_i8:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: ldeorb w1, w0, [x0]
@@ -1753,6 +2428,20 @@ define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O1-NEXT: mov w0, w8
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_min_i8:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: LBB33_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O1-NEXT: ldaxrb w8, [x0]
+; CHECK-OUTLINE-O1-NEXT: sxtb w9, w8
+; CHECK-OUTLINE-O1-NEXT: cmp w9, w1, sxtb
+; CHECK-OUTLINE-O1-NEXT: csel w9, w8, w1, le
+; CHECK-OUTLINE-O1-NEXT: stxrb w10, w9, [x0]
+; CHECK-OUTLINE-O1-NEXT: cbnz w10, LBB33_1
+; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-OUTLINE-O1-NEXT: mov w0, w8
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: atomicrmw_min_i8:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
@@ -1795,6 +2484,42 @@ define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_min_i8:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #48
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 48
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: ldrb w0, [x0]
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: b LBB33_1
+; CHECK-OUTLINE-O0-NEXT: LBB33_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: sxtb w9, w0
+; CHECK-OUTLINE-O0-NEXT: subs w9, w9, w8, sxtb
+; CHECK-OUTLINE-O0-NEXT: csel w1, w0, w8, le
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas1_acq
+; CHECK-OUTLINE-O0-NEXT: ldr w9, [sp, #8] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: and w8, w0, #0xff
+; CHECK-OUTLINE-O0-NEXT: subs w8, w8, w9, uxtb
+; CHECK-OUTLINE-O0-NEXT: cset w8, eq
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB33_1
+; CHECK-OUTLINE-O0-NEXT: b LBB33_2
+; CHECK-OUTLINE-O0-NEXT: LBB33_2: ; %atomicrmw.end
+; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #48
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: atomicrmw_min_i8:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: ldsminab w1, w0, [x0]
@@ -1823,6 +2548,20 @@ define i8 @atomicrmw_max_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O1-NEXT: mov w0, w8
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_max_i8:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: LBB34_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O1-NEXT: ldxrb w8, [x0]
+; CHECK-OUTLINE-O1-NEXT: sxtb w9, w8
+; CHECK-OUTLINE-O1-NEXT: cmp w9, w1, sxtb
+; CHECK-OUTLINE-O1-NEXT: csel w9, w8, w1, gt
+; CHECK-OUTLINE-O1-NEXT: stlxrb w10, w9, [x0]
+; CHECK-OUTLINE-O1-NEXT: cbnz w10, LBB34_1
+; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-OUTLINE-O1-NEXT: mov w0, w8
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: atomicrmw_max_i8:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
@@ -1865,6 +2604,42 @@ define i8 @atomicrmw_max_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_max_i8:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #48
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 48
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: ldrb w0, [x0]
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: b LBB34_1
+; CHECK-OUTLINE-O0-NEXT: LBB34_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: sxtb w9, w0
+; CHECK-OUTLINE-O0-NEXT: subs w9, w9, w8, sxtb
+; CHECK-OUTLINE-O0-NEXT: csel w1, w0, w8, gt
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas1_rel
+; CHECK-OUTLINE-O0-NEXT: ldr w9, [sp, #8] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: and w8, w0, #0xff
+; CHECK-OUTLINE-O0-NEXT: subs w8, w8, w9, uxtb
+; CHECK-OUTLINE-O0-NEXT: cset w8, eq
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB34_1
+; CHECK-OUTLINE-O0-NEXT: b LBB34_2
+; CHECK-OUTLINE-O0-NEXT: LBB34_2: ; %atomicrmw.end
+; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #48
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: atomicrmw_max_i8:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: ldsmaxlb w1, w0, [x0]
@@ -1894,6 +2669,21 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O1-NEXT: mov w0, w8
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_umin_i8:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: and w9, w1, #0xff
+; CHECK-OUTLINE-O1-NEXT: LBB35_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O1-NEXT: ldaxrb w8, [x0]
+; CHECK-OUTLINE-O1-NEXT: and w10, w8, #0xff
+; CHECK-OUTLINE-O1-NEXT: cmp w10, w9
+; CHECK-OUTLINE-O1-NEXT: csel w10, w10, w9, ls
+; CHECK-OUTLINE-O1-NEXT: stlxrb w11, w10, [x0]
+; CHECK-OUTLINE-O1-NEXT: cbnz w11, LBB35_1
+; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-OUTLINE-O1-NEXT: mov w0, w8
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: atomicrmw_umin_i8:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
@@ -1936,6 +2726,42 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_umin_i8:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #48
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 48
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: ldrb w0, [x0]
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: b LBB35_1
+; CHECK-OUTLINE-O0-NEXT: LBB35_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: and w9, w0, #0xff
+; CHECK-OUTLINE-O0-NEXT: subs w9, w9, w8, uxtb
+; CHECK-OUTLINE-O0-NEXT: csel w1, w0, w8, ls
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas1_acq_rel
+; CHECK-OUTLINE-O0-NEXT: ldr w9, [sp, #8] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: and w8, w0, #0xff
+; CHECK-OUTLINE-O0-NEXT: subs w8, w8, w9, uxtb
+; CHECK-OUTLINE-O0-NEXT: cset w8, eq
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB35_1
+; CHECK-OUTLINE-O0-NEXT: b LBB35_2
+; CHECK-OUTLINE-O0-NEXT: LBB35_2: ; %atomicrmw.end
+; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #48
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: atomicrmw_umin_i8:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: lduminalb w1, w0, [x0]
@@ -1965,6 +2791,21 @@ define i8 @atomicrmw_umax_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O1-NEXT: mov w0, w8
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_umax_i8:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: and w9, w1, #0xff
+; CHECK-OUTLINE-O1-NEXT: LBB36_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O1-NEXT: ldxrb w8, [x0]
+; CHECK-OUTLINE-O1-NEXT: and w10, w8, #0xff
+; CHECK-OUTLINE-O1-NEXT: cmp w10, w9
+; CHECK-OUTLINE-O1-NEXT: csel w10, w10, w9, hi
+; CHECK-OUTLINE-O1-NEXT: stxrb w11, w10, [x0]
+; CHECK-OUTLINE-O1-NEXT: cbnz w11, LBB36_1
+; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-OUTLINE-O1-NEXT: mov w0, w8
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: atomicrmw_umax_i8:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
@@ -2007,6 +2848,42 @@ define i8 @atomicrmw_umax_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_umax_i8:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #48
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 48
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: ldrb w0, [x0]
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: b LBB36_1
+; CHECK-OUTLINE-O0-NEXT: LBB36_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: and w9, w0, #0xff
+; CHECK-OUTLINE-O0-NEXT: subs w9, w9, w8, uxtb
+; CHECK-OUTLINE-O0-NEXT: csel w1, w0, w8, hi
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas1_relax
+; CHECK-OUTLINE-O0-NEXT: ldr w9, [sp, #8] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: and w8, w0, #0xff
+; CHECK-OUTLINE-O0-NEXT: subs w8, w8, w9, uxtb
+; CHECK-OUTLINE-O0-NEXT: cset w8, eq
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB36_1
+; CHECK-OUTLINE-O0-NEXT: b LBB36_2
+; CHECK-OUTLINE-O0-NEXT: LBB36_2: ; %atomicrmw.end
+; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #48
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: atomicrmw_umax_i8:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: ldumaxb w1, w0, [x0]
@@ -2033,6 +2910,19 @@ define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O1-NEXT: mov w0, w8
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_add_i16:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: .cfi_def_cfa_offset 16
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O1-NEXT: mov x2, x0
+; CHECK-OUTLINE-O1-NEXT: mov w0, w1
+; CHECK-OUTLINE-O1-NEXT: mov x1, x2
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_ldadd2_acq_rel
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: atomicrmw_add_i16:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
@@ -2073,6 +2963,21 @@ define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_add_i16:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov w0, w1
+; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldadd2_acq_rel
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: atomicrmw_add_i16:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: ldaddalh w1, w0, [x0]
@@ -2099,6 +3004,19 @@ define i16 @atomicrmw_xchg_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O1-NEXT: mov w0, w8
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_xchg_i16:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: .cfi_def_cfa_offset 16
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O1-NEXT: mov x2, x0
+; CHECK-OUTLINE-O1-NEXT: mov w0, w1
+; CHECK-OUTLINE-O1-NEXT: mov x1, x2
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_swp2_relax
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: atomicrmw_xchg_i16:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
@@ -2138,6 +3056,21 @@ define i16 @atomicrmw_xchg_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_xchg_i16:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov w0, w1
+; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_swp2_relax
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: atomicrmw_xchg_i16:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: swph w1, w0, [x0]
@@ -2164,6 +3097,19 @@ define i16 @atomicrmw_sub_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O1-NEXT: mov w0, w8
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_sub_i16:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: .cfi_def_cfa_offset 16
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O1-NEXT: mov x2, x0
+; CHECK-OUTLINE-O1-NEXT: neg w0, w1
+; CHECK-OUTLINE-O1-NEXT: mov x1, x2
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_ldadd2_acq
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: atomicrmw_sub_i16:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
@@ -2204,6 +3150,23 @@ define i16 @atomicrmw_sub_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_sub_i16:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov w9, w1
+; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: mov w8, wzr
+; CHECK-OUTLINE-O0-NEXT: subs w0, w8, w9
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldadd2_acq
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: atomicrmw_sub_i16:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: neg w8, w1
@@ -2232,6 +3195,20 @@ define i16 @atomicrmw_and_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O1-NEXT: mov w0, w8
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_and_i16:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: .cfi_def_cfa_offset 16
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O1-NEXT: mov x2, x0
+; CHECK-OUTLINE-O1-NEXT: mov w8, #-1 ; =0xffffffff
+; CHECK-OUTLINE-O1-NEXT: eor w0, w8, w1
+; CHECK-OUTLINE-O1-NEXT: mov x1, x2
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_ldclr2_rel
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: atomicrmw_and_i16:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
@@ -2272,6 +3249,23 @@ define i16 @atomicrmw_and_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_and_i16:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov w9, w1
+; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: mov w8, #-1 ; =0xffffffff
+; CHECK-OUTLINE-O0-NEXT: eor w0, w8, w9
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldclr2_rel
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: atomicrmw_and_i16:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: mvn w8, w1
@@ -2300,6 +3294,19 @@ define i16 @atomicrmw_or_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O1-NEXT: mov w0, w8
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_or_i16:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: .cfi_def_cfa_offset 16
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O1-NEXT: mov x2, x0
+; CHECK-OUTLINE-O1-NEXT: mov w0, w1
+; CHECK-OUTLINE-O1-NEXT: mov x1, x2
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_ldset2_acq_rel
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: atomicrmw_or_i16:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
@@ -2340,6 +3347,21 @@ define i16 @atomicrmw_or_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_or_i16:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov w0, w1
+; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldset2_acq_rel
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: atomicrmw_or_i16:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: ldsetalh w1, w0, [x0]
@@ -2366,6 +3388,19 @@ define i16 @atomicrmw_xor_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O1-NEXT: mov w0, w8
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_xor_i16:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: .cfi_def_cfa_offset 16
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O1-NEXT: mov x2, x0
+; CHECK-OUTLINE-O1-NEXT: mov w0, w1
+; CHECK-OUTLINE-O1-NEXT: mov x1, x2
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_ldeor2_relax
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: atomicrmw_xor_i16:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
@@ -2406,6 +3441,21 @@ define i16 @atomicrmw_xor_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_xor_i16:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov w0, w1
+; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldeor2_relax
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: atomicrmw_xor_i16:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: ldeorh w1, w0, [x0]
@@ -2434,6 +3484,20 @@ define i16 @atomicrmw_min_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O1-NEXT: mov w0, w8
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_min_i16:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: LBB43_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O1-NEXT: ldaxrh w8, [x0]
+; CHECK-OUTLINE-O1-NEXT: sxth w9, w8
+; CHECK-OUTLINE-O1-NEXT: cmp w9, w1, sxth
+; CHECK-OUTLINE-O1-NEXT: csel w9, w8, w1, le
+; CHECK-OUTLINE-O1-NEXT: stxrh w10, w9, [x0]
+; CHECK-OUTLINE-O1-NEXT: cbnz w10, LBB43_1
+; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-OUTLINE-O1-NEXT: mov w0, w8
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: atomicrmw_min_i16:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
@@ -2476,6 +3540,42 @@ define i16 @atomicrmw_min_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_min_i16:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #48
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 48
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: ldrh w0, [x0]
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: b LBB43_1
+; CHECK-OUTLINE-O0-NEXT: LBB43_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: sxth w9, w0
+; CHECK-OUTLINE-O0-NEXT: subs w9, w9, w8, sxth
+; CHECK-OUTLINE-O0-NEXT: csel w1, w0, w8, le
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas2_acq
+; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #8] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: uxth w8, w8
+; CHECK-OUTLINE-O0-NEXT: subs w8, w8, w0, uxth
+; CHECK-OUTLINE-O0-NEXT: cset w8, eq
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB43_1
+; CHECK-OUTLINE-O0-NEXT: b LBB43_2
+; CHECK-OUTLINE-O0-NEXT: LBB43_2: ; %atomicrmw.end
+; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #48
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: atomicrmw_min_i16:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: ldsminah w1, w0, [x0]
@@ -2504,6 +3604,20 @@ define i16 @atomicrmw_max_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O1-NEXT: mov w0, w8
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_max_i16:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: LBB44_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O1-NEXT: ldxrh w8, [x0]
+; CHECK-OUTLINE-O1-NEXT: sxth w9, w8
+; CHECK-OUTLINE-O1-NEXT: cmp w9, w1, sxth
+; CHECK-OUTLINE-O1-NEXT: csel w9, w8, w1, gt
+; CHECK-OUTLINE-O1-NEXT: stlxrh w10, w9, [x0]
+; CHECK-OUTLINE-O1-NEXT: cbnz w10, LBB44_1
+; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-OUTLINE-O1-NEXT: mov w0, w8
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: atomicrmw_max_i16:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
@@ -2546,6 +3660,42 @@ define i16 @atomicrmw_max_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_max_i16:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #48
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 48
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: ldrh w0, [x0]
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: b LBB44_1
+; CHECK-OUTLINE-O0-NEXT: LBB44_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: sxth w9, w0
+; CHECK-OUTLINE-O0-NEXT: subs w9, w9, w8, sxth
+; CHECK-OUTLINE-O0-NEXT: csel w1, w0, w8, gt
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas2_rel
+; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #8] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: uxth w8, w8
+; CHECK-OUTLINE-O0-NEXT: subs w8, w8, w0, uxth
+; CHECK-OUTLINE-O0-NEXT: cset w8, eq
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB44_1
+; CHECK-OUTLINE-O0-NEXT: b LBB44_2
+; CHECK-OUTLINE-O0-NEXT: LBB44_2: ; %atomicrmw.end
+; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #48
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: atomicrmw_max_i16:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: ldsmaxlh w1, w0, [x0]
@@ -2575,6 +3725,21 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O1-NEXT: mov w0, w8
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_umin_i16:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: and w9, w1, #0xffff
+; CHECK-OUTLINE-O1-NEXT: LBB45_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O1-NEXT: ldaxrh w8, [x0]
+; CHECK-OUTLINE-O1-NEXT: and w10, w8, #0xffff
+; CHECK-OUTLINE-O1-NEXT: cmp w10, w9
+; CHECK-OUTLINE-O1-NEXT: csel w10, w10, w9, ls
+; CHECK-OUTLINE-O1-NEXT: stlxrh w11, w10, [x0]
+; CHECK-OUTLINE-O1-NEXT: cbnz w11, LBB45_1
+; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-OUTLINE-O1-NEXT: mov w0, w8
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: atomicrmw_umin_i16:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
@@ -2617,6 +3782,42 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_umin_i16:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #48
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 48
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: ldrh w0, [x0]
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: b LBB45_1
+; CHECK-OUTLINE-O0-NEXT: LBB45_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: uxth w9, w0
+; CHECK-OUTLINE-O0-NEXT: subs w9, w9, w8, uxth
+; CHECK-OUTLINE-O0-NEXT: csel w1, w0, w8, ls
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas2_acq_rel
+; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #8] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: uxth w8, w8
+; CHECK-OUTLINE-O0-NEXT: subs w8, w8, w0, uxth
+; CHECK-OUTLINE-O0-NEXT: cset w8, eq
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB45_1
+; CHECK-OUTLINE-O0-NEXT: b LBB45_2
+; CHECK-OUTLINE-O0-NEXT: LBB45_2: ; %atomicrmw.end
+; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #48
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: atomicrmw_umin_i16:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: lduminalh w1, w0, [x0]
@@ -2646,6 +3847,21 @@ define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O1-NEXT: mov w0, w8
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_umax_i16:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: and w9, w1, #0xffff
+; CHECK-OUTLINE-O1-NEXT: LBB46_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O1-NEXT: ldxrh w8, [x0]
+; CHECK-OUTLINE-O1-NEXT: and w10, w8, #0xffff
+; CHECK-OUTLINE-O1-NEXT: cmp w10, w9
+; CHECK-OUTLINE-O1-NEXT: csel w10, w10, w9, hi
+; CHECK-OUTLINE-O1-NEXT: stxrh w11, w10, [x0]
+; CHECK-OUTLINE-O1-NEXT: cbnz w11, LBB46_1
+; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-OUTLINE-O1-NEXT: mov w0, w8
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: atomicrmw_umax_i16:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
@@ -2688,6 +3904,42 @@ define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_umax_i16:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #48
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 48
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: ldrh w0, [x0]
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: b LBB46_1
+; CHECK-OUTLINE-O0-NEXT: LBB46_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: uxth w9, w0
+; CHECK-OUTLINE-O0-NEXT: subs w9, w9, w8, uxth
+; CHECK-OUTLINE-O0-NEXT: csel w1, w0, w8, hi
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas2_relax
+; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #8] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: uxth w8, w8
+; CHECK-OUTLINE-O0-NEXT: subs w8, w8, w0, uxth
+; CHECK-OUTLINE-O0-NEXT: cset w8, eq
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB46_1
+; CHECK-OUTLINE-O0-NEXT: b LBB46_2
+; CHECK-OUTLINE-O0-NEXT: LBB46_2: ; %atomicrmw.end
+; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #48
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: atomicrmw_umax_i16:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: ldumaxh w1, w0, [x0]
@@ -2726,6 +3978,28 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) {
; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: cmpxchg_i8:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w19, -24
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w20, -32
+; CHECK-OUTLINE-O1-NEXT: mov x3, x0
+; CHECK-OUTLINE-O1-NEXT: mov w19, w1
+; CHECK-OUTLINE-O1-NEXT: mov w1, w2
+; CHECK-OUTLINE-O1-NEXT: mov w0, w19
+; CHECK-OUTLINE-O1-NEXT: mov x2, x3
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_cas1_relax
+; CHECK-OUTLINE-O1-NEXT: and w8, w0, #0xff
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: cmp w8, w19, uxtb
+; CHECK-OUTLINE-O1-NEXT: cset w1, eq
+; CHECK-OUTLINE-O1-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: cmpxchg_i8:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: mov x9, x0
@@ -2742,6 +4016,27 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) {
; CHECK-NOLSE-O0-NEXT: cset w1, eq
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: cmpxchg_i8:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov w0, w1
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov w1, w2
+; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas1_relax
+; CHECK-OUTLINE-O0-NEXT: ldr w1, [sp, #12] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: and w8, w0, #0xff
+; CHECK-OUTLINE-O0-NEXT: subs w8, w8, w1, uxtb
+; CHECK-OUTLINE-O0-NEXT: cset w1, eq
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: cmpxchg_i8:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: mov x8, x1
@@ -2790,6 +4085,28 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0
; CHECK-NOLSE-O1-NEXT: ret
;
+; CHECK-OUTLINE-O1-LABEL: cmpxchg_i16:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w19, -24
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w20, -32
+; CHECK-OUTLINE-O1-NEXT: mov x3, x0
+; CHECK-OUTLINE-O1-NEXT: mov w19, w1
+; CHECK-OUTLINE-O1-NEXT: mov w1, w2
+; CHECK-OUTLINE-O1-NEXT: mov w0, w19
+; CHECK-OUTLINE-O1-NEXT: mov x2, x3
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_cas2_relax
+; CHECK-OUTLINE-O1-NEXT: and w8, w0, #0xffff
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: cmp w8, w19, uxth
+; CHECK-OUTLINE-O1-NEXT: cset w1, eq
+; CHECK-OUTLINE-O1-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
; CHECK-NOLSE-O0-LABEL: cmpxchg_i16:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: mov x9, x0
@@ -2806,6 +4123,27 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
; CHECK-NOLSE-O0-NEXT: cset w1, eq
; CHECK-NOLSE-O0-NEXT: ret
;
+; CHECK-OUTLINE-O0-LABEL: cmpxchg_i16:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov w0, w1
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov w1, w2
+; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas2_relax
+; CHECK-OUTLINE-O0-NEXT: ldr w1, [sp, #12] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: and w8, w0, #0xffff
+; CHECK-OUTLINE-O0-NEXT: subs w8, w8, w1, uxth
+; CHECK-OUTLINE-O0-NEXT: cset w1, eq
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: cmpxchg_i16:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: mov x8, x1
@@ -2836,6 +4174,12 @@ define internal double @bitcast_to_double(ptr %ptr) {
; CHECK-NOLSE-NEXT: fmov d0, x8
; CHECK-NOLSE-NEXT: ret
;
+; CHECK-OUTLINE-LABEL: bitcast_to_double:
+; CHECK-OUTLINE: ; %bb.0:
+; CHECK-OUTLINE-NEXT: ldar x8, [x0]
+; CHECK-OUTLINE-NEXT: fmov d0, x8
+; CHECK-OUTLINE-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: bitcast_to_double:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: ldar x8, [x0]
@@ -2859,6 +4203,12 @@ define internal float @bitcast_to_float(ptr %ptr) {
; CHECK-NOLSE-NEXT: fmov s0, w8
; CHECK-NOLSE-NEXT: ret
;
+; CHECK-OUTLINE-LABEL: bitcast_to_float:
+; CHECK-OUTLINE: ; %bb.0:
+; CHECK-OUTLINE-NEXT: ldar w8, [x0]
+; CHECK-OUTLINE-NEXT: fmov s0, w8
+; CHECK-OUTLINE-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: bitcast_to_float:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: ldar w8, [x0]
@@ -2883,6 +4233,13 @@ define internal half @bitcast_to_half(ptr %ptr) {
; CHECK-NOLSE-NEXT: ; kill: def $h0 killed $h0 killed $s0
; CHECK-NOLSE-NEXT: ret
;
+; CHECK-OUTLINE-LABEL: bitcast_to_half:
+; CHECK-OUTLINE: ; %bb.0:
+; CHECK-OUTLINE-NEXT: ldarh w8, [x0]
+; CHECK-OUTLINE-NEXT: fmov s0, w8
+; CHECK-OUTLINE-NEXT: ; kill: def $h0 killed $h0 killed $s0
+; CHECK-OUTLINE-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: bitcast_to_half:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: ldarh w8, [x0]
@@ -2907,6 +4264,11 @@ define internal ptr @inttoptr(ptr %ptr) {
; CHECK-NOLSE-NEXT: ldar x0, [x0]
; CHECK-NOLSE-NEXT: ret
;
+; CHECK-OUTLINE-LABEL: inttoptr:
+; CHECK-OUTLINE: ; %bb.0:
+; CHECK-OUTLINE-NEXT: ldar x0, [x0]
+; CHECK-OUTLINE-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: inttoptr:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: ldar x0, [x0]
@@ -2927,6 +4289,11 @@ define internal ptr @load_ptr(ptr %ptr) {
; CHECK-NOLSE-NEXT: ldar x0, [x0]
; CHECK-NOLSE-NEXT: ret
;
+; CHECK-OUTLINE-LABEL: load_ptr:
+; CHECK-OUTLINE: ; %bb.0:
+; CHECK-OUTLINE-NEXT: ldar x0, [x0]
+; CHECK-OUTLINE-NEXT: ret
+;
; CHECK-LSE-O1-LABEL: load_ptr:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: ldar x0, [x0]
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index ae15e74a43277a..5f412d20196c20 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -203,7 +203,6 @@
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_ATOMICRMW_SUB (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
-# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_ATOMICRMW_AND (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
@@ -226,7 +225,6 @@
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_ATOMICRMW_MIN (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
-# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_ATOMICRMW_UMAX (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
>From 454c46aa5566fe86b8e08e66836473220096f85e Mon Sep 17 00:00:00 2001
From: Thomas Preud'homme <thomas.preudhomme at arm.com>
Date: Wed, 6 Dec 2023 13:32:27 +0000
Subject: [PATCH 2/6] Move ATOMICRMW_SUB to ATOMICRMW_ADD conversion to
libcall()
---
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 21 +++++++++----------
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 9 +-------
.../GlobalISel/legalizer-info-validation.mir | 1 +
3 files changed, 12 insertions(+), 19 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 186937e597c5bc..683b614192f457 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -824,6 +824,10 @@ getOutlineAtomicLibcall(unsigned Opc, AtomicOrdering Order, uint64_t MemSize) {
const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
return LC[ModeN][ModelN];
}
+ case TargetOpcode::G_ATOMICRMW_SUB: {
+ const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
+ return LC[ModeN][ModelN];
+ }
case TargetOpcode::G_ATOMICRMW_AND: {
const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
return LC[ModeN][ModelN];
@@ -879,6 +883,7 @@ createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI) {
}
case TargetOpcode::G_ATOMICRMW_XCHG:
case TargetOpcode::G_ATOMICRMW_ADD:
+ case TargetOpcode::G_ATOMICRMW_SUB:
case TargetOpcode::G_ATOMICRMW_AND:
case TargetOpcode::G_ATOMICRMW_OR:
case TargetOpcode::G_ATOMICRMW_XOR: {
@@ -889,6 +894,10 @@ createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI) {
Register Tmp = MRI.createGenericVirtualRegister(ValLLT);
MIRBuilder.buildXor(Tmp, MIRBuilder.buildConstant(ValLLT, -1), Val);
Val = Tmp;
+ } else if (Opc == TargetOpcode::G_ATOMICRMW_SUB) {
+ Register Tmp = MRI.createGenericVirtualRegister(ValLLT);
+ MIRBuilder.buildSub(Tmp, MIRBuilder.buildConstant(ValLLT, 0), Val);
+ Val = Tmp;
}
Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
@@ -1182,6 +1191,7 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
}
case TargetOpcode::G_ATOMICRMW_XCHG:
case TargetOpcode::G_ATOMICRMW_ADD:
+ case TargetOpcode::G_ATOMICRMW_SUB:
case TargetOpcode::G_ATOMICRMW_AND:
case TargetOpcode::G_ATOMICRMW_OR:
case TargetOpcode::G_ATOMICRMW_XOR:
@@ -3965,17 +3975,6 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return lowerTRUNC(MI);
GISEL_VECREDUCE_CASES_NONSEQ
return lowerVectorReduction(MI);
- case G_ATOMICRMW_SUB: {
- auto Val = MI.getOperand(2).getReg();
- LLT ValLLT = MRI.getType(Val);
- Register Tmp = MRI.createGenericVirtualRegister(ValLLT);
- MIRBuilder.buildSub(Tmp, MIRBuilder.buildConstant(ValLLT, 0), Val);
- auto [Ret, Mem] = MI.getFirst2Regs();
- auto &MMO = cast<GMemOperation>(MI).getMMO();
- MIRBuilder.buildAtomicRMWAdd(Ret, Mem, Tmp, MMO);
- MI.eraseFromParent();
- return Legalized;
- }
}
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 7fce3e501db57c..cca06a2510c4f6 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -768,7 +768,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)));
getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
- G_ATOMICRMW_AND, G_ATOMICRMW_OR,
+ G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
G_ATOMICRMW_XOR})
.libcallIf([&ST](const LegalityQuery &Query) {
return ST.outlineAtomics() && !ST.hasLSE();
@@ -776,13 +776,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampScalar(0, s32, s64)
.legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)));
- getActionDefinitionsBuilder(G_ATOMICRMW_SUB)
- .lowerIf([&ST](const LegalityQuery &Query) {
- return ST.outlineAtomics() && !ST.hasLSE();
- })
- .clampScalar(0, s32, s64)
- .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)));
-
// [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far.
// Don't outline them unless
// (1) high level <atomic> support approved:
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index 5f412d20196c20..a0c13e3a82f774 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -203,6 +203,7 @@
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_ATOMICRMW_SUB (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
+# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_ATOMICRMW_AND (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
>From 845d86ce320de79583107f5c312545ee771278c7 Mon Sep 17 00:00:00 2001
From: Thomas Preud'homme <thomas.preudhomme at arm.com>
Date: Thu, 7 Dec 2023 19:36:49 +0000
Subject: [PATCH 3/6] Put legal first, then custom
---
.../llvm/CodeGen/GlobalISel/LegalizerInfo.h | 5 ++++
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 28 ++++++++++---------
2 files changed, 20 insertions(+), 13 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
index e51a3ec9400543..6a59cd8f10f877 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
@@ -223,6 +223,11 @@ struct TypePairAndMemDesc {
}
};
+/// True iff P is false.
+template <typename Predicate> Predicate predNot(Predicate P) {
+ return [=](const LegalityQuery &Query) { return !P(Query); };
+}
+
/// True iff P0 and P1 are true.
template<typename Predicate>
Predicate all(Predicate P0, Predicate P1) {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index cca06a2510c4f6..8a0ff402336e14 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -757,24 +757,26 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.lowerIf(
all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
+ LegalityPredicate UseOutlineAtomics = [&ST](const LegalityQuery &Query) {
+ return ST.outlineAtomics() && !ST.hasLSE();
+ };
+
getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
- .libcallIf([&ST](const LegalityQuery &Query) {
- return ST.outlineAtomics() && !ST.hasLSE();
- })
- .customIf([](const LegalityQuery &Query) {
- return Query.Types[0].getSizeInBits() == 128;
- })
- .clampScalar(0, s32, s64)
- .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)));
+ .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0),
+ predNot(UseOutlineAtomics)))
+ .customIf(all(typeIs(0, s128), predNot(UseOutlineAtomics)))
+ .libcallIf(all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(1, p0),
+ UseOutlineAtomics))
+ .clampScalar(0, s32, s64);
getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
G_ATOMICRMW_XOR})
- .libcallIf([&ST](const LegalityQuery &Query) {
- return ST.outlineAtomics() && !ST.hasLSE();
- })
- .clampScalar(0, s32, s64)
- .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)));
+ .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0),
+ predNot(UseOutlineAtomics)))
+ .libcallIf(all(typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0),
+ UseOutlineAtomics))
+ .clampScalar(0, s32, s64);
// [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far.
// Don't outline them unless
>From f8488aa6a11701492394a6d887edb0e4a5d87891 Mon Sep 17 00:00:00 2001
From: Thomas Preud'homme <thomas.preudhomme at arm.com>
Date: Fri, 15 Dec 2023 11:58:41 +0000
Subject: [PATCH 4/6] Share outline atomic libcall selection.
---
llvm/include/llvm/CodeGen/RuntimeLibcalls.h | 6 ++
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 70 +++++--------------
llvm/lib/CodeGen/TargetLoweringBase.cpp | 41 +++++++----
3 files changed, 49 insertions(+), 68 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/RuntimeLibcalls.h b/llvm/include/llvm/CodeGen/RuntimeLibcalls.h
index 66642068151073..3a407c4a4d9406 100644
--- a/llvm/include/llvm/CodeGen/RuntimeLibcalls.h
+++ b/llvm/include/llvm/CodeGen/RuntimeLibcalls.h
@@ -82,6 +82,12 @@ namespace RTLIB {
/// UNKNOWN_LIBCALL if there is none.
Libcall getSYNC(unsigned Opc, MVT VT);
+ /// Return the outline atomics value for the given atomic ordering, access
+ /// size and set of libcalls for a given atomic, or UNKNOWN_LIBCALL if there
+ /// is none.
+ Libcall getOutlineAtomicHelper(const Libcall (&LC)[5][4],
+ AtomicOrdering Order, uint64_t MemSize);
+
/// Return the outline atomics value for the given opcode, atomic ordering
/// and type, or UNKNOWN_LIBCALL if there is none.
Libcall getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order, MVT VT);
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 683b614192f457..ea5e212e6b21d5 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -25,6 +25,7 @@
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -765,46 +766,15 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
return LegalizerHelper::Legalized;
}
-static RTLIB::Libcall
-getOutlineAtomicLibcall(unsigned Opc, AtomicOrdering Order, uint64_t MemSize) {
- unsigned ModeN, ModelN;
- switch (MemSize) {
- case 1:
- ModeN = 0;
- break;
- case 2:
- ModeN = 1;
- break;
- case 4:
- ModeN = 2;
- break;
- case 8:
- ModeN = 3;
- break;
- case 16:
- ModeN = 4;
- break;
- default:
- return RTLIB::UNKNOWN_LIBCALL;
- }
-
- switch (Order) {
- case AtomicOrdering::Monotonic:
- ModelN = 0;
- break;
- case AtomicOrdering::Acquire:
- ModelN = 1;
- break;
- case AtomicOrdering::Release:
- ModelN = 2;
- break;
- case AtomicOrdering::AcquireRelease:
- case AtomicOrdering::SequentiallyConsistent:
- ModelN = 3;
- break;
- default:
+static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI) {
+ unsigned Opc = MI.getOpcode();
+ auto &AtomicMI = cast<GMemOperation>(MI);
+ auto &MMO = AtomicMI.getMMO();
+ auto Ordering = MMO.getMergedOrdering();
+ LLT MemType = MMO.getMemoryType();
+ uint64_t MemSize = MemType.getSizeInBytes();
+ if (!MemType.isScalar())
return RTLIB::UNKNOWN_LIBCALL;
- }
#define LCALLS(A, B) \
{ A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL }
@@ -814,31 +784,28 @@ getOutlineAtomicLibcall(unsigned Opc, AtomicOrdering Order, uint64_t MemSize) {
case TargetOpcode::G_ATOMIC_CMPXCHG:
case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
- return LC[ModeN][ModelN];
+ return getOutlineAtomicHelper(LC, Ordering, MemSize);
}
case TargetOpcode::G_ATOMICRMW_XCHG: {
const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
- return LC[ModeN][ModelN];
- }
- case TargetOpcode::G_ATOMICRMW_ADD: {
- const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
- return LC[ModeN][ModelN];
+ return getOutlineAtomicHelper(LC, Ordering, MemSize);
}
+ case TargetOpcode::G_ATOMICRMW_ADD:
case TargetOpcode::G_ATOMICRMW_SUB: {
const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
- return LC[ModeN][ModelN];
+ return getOutlineAtomicHelper(LC, Ordering, MemSize);
}
case TargetOpcode::G_ATOMICRMW_AND: {
const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
- return LC[ModeN][ModelN];
+ return getOutlineAtomicHelper(LC, Ordering, MemSize);
}
case TargetOpcode::G_ATOMICRMW_OR: {
const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
- return LC[ModeN][ModelN];
+ return getOutlineAtomicHelper(LC, Ordering, MemSize);
}
case TargetOpcode::G_ATOMICRMW_XOR: {
const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
- return LC[ModeN][ModelN];
+ return getOutlineAtomicHelper(LC, Ordering, MemSize);
}
default:
return RTLIB::UNKNOWN_LIBCALL;
@@ -909,10 +876,7 @@ createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI) {
auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
- auto &AtomicMI = cast<GMemOperation>(MI);
- auto Ordering = AtomicMI.getMMO().getMergedOrdering();
- uint64_t MemSize = AtomicMI.getMemSize();
- RTLIB::Libcall RTLibcall = getOutlineAtomicLibcall(Opc, Ordering, MemSize);
+ RTLIB::Libcall RTLibcall = getOutlineAtomicLibcall(MI);
const char *Name = TLI.getLibcallName(RTLibcall);
// Unsupported libcall on the target.
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 2648c16bcd8d90..acbbfd9ddaf52d 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -520,27 +520,28 @@ RTLIB::Libcall RTLIB::getFREXP(EVT RetVT) {
FREXP_PPCF128);
}
-RTLIB::Libcall RTLIB::getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order,
- MVT VT) {
+RTLIB::Libcall RTLIB::getOutlineAtomicHelper(const Libcall (&LC)[5][4],
+ AtomicOrdering Order,
+ uint64_t MemSize) {
unsigned ModeN, ModelN;
- switch (VT.SimpleTy) {
- case MVT::i8:
+ switch (MemSize) {
+ case 1:
ModeN = 0;
break;
- case MVT::i16:
+ case 2:
ModeN = 1;
break;
- case MVT::i32:
+ case 4:
ModeN = 2;
break;
- case MVT::i64:
+ case 8:
ModeN = 3;
break;
- case MVT::i128:
+ case 16:
ModeN = 4;
break;
default:
- return UNKNOWN_LIBCALL;
+ return RTLIB::UNKNOWN_LIBCALL;
}
switch (Order) {
@@ -561,6 +562,16 @@ RTLIB::Libcall RTLIB::getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order,
return UNKNOWN_LIBCALL;
}
+ return LC[ModeN][ModelN];
+}
+
+RTLIB::Libcall RTLIB::getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order,
+ MVT VT) {
+ unsigned ModeN, ModelN;
+ if (!VT.isScalarInteger())
+ return UNKNOWN_LIBCALL;
+ uint64_t MemSize = VT.getScalarSizeInBits() / 8;
+
#define LCALLS(A, B) \
{ A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL }
#define LCALL5(A) \
@@ -568,27 +579,27 @@ RTLIB::Libcall RTLIB::getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order,
switch (Opc) {
case ISD::ATOMIC_CMP_SWAP: {
const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_CAS)};
- return LC[ModeN][ModelN];
+ return getOutlineAtomicHelper(LC, Order, MemSize);
}
case ISD::ATOMIC_SWAP: {
const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_SWP)};
- return LC[ModeN][ModelN];
+ return getOutlineAtomicHelper(LC, Order, MemSize);
}
case ISD::ATOMIC_LOAD_ADD: {
const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDADD)};
- return LC[ModeN][ModelN];
+ return getOutlineAtomicHelper(LC, Order, MemSize);
}
case ISD::ATOMIC_LOAD_OR: {
const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDSET)};
- return LC[ModeN][ModelN];
+ return getOutlineAtomicHelper(LC, Order, MemSize);
}
case ISD::ATOMIC_LOAD_CLR: {
const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDCLR)};
- return LC[ModeN][ModelN];
+ return getOutlineAtomicHelper(LC, Order, MemSize);
}
case ISD::ATOMIC_LOAD_XOR: {
const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDEOR)};
- return LC[ModeN][ModelN];
+ return getOutlineAtomicHelper(LC, Order, MemSize);
}
default:
return UNKNOWN_LIBCALL;
>From 8bc9d0f5ba0f2653222d492a95f7c46f7407186d Mon Sep 17 00:00:00 2001
From: Thomas Preud'homme <thomas.preudhomme at arm.com>
Date: Fri, 15 Dec 2023 13:50:59 +0000
Subject: [PATCH 5/6] Add 32 and 64 bits atomic tests for GlobalISel
---
.../AArch64/GlobalISel/arm64-atomic.ll | 2281 ++++++++++++++++-
1 file changed, 2261 insertions(+), 20 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
index dd516e4a1e6c74..986c36426fb539 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
@@ -3953,26 +3953,2069 @@ define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) {
ret i16 %res
}
+define i32 @atomicrmw_add_i32(ptr %ptr, i32 %rhs) {
+; CHECK-NOLSE-O1-LABEL: atomicrmw_add_i32:
+; CHECK-NOLSE-O1: ; %bb.0:
+; CHECK-NOLSE-O1-NEXT: LBB47_1: ; %atomicrmw.start
+; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O1-NEXT: ldaxr w8, [x0]
+; CHECK-NOLSE-O1-NEXT: add w9, w8, w1
+; CHECK-NOLSE-O1-NEXT: stlxr w10, w9, [x0]
+; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB47_1
+; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-NOLSE-O1-NEXT: mov w0, w8
+; CHECK-NOLSE-O1-NEXT: ret
+;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_add_i32:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: .cfi_def_cfa_offset 16
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O1-NEXT: mov x2, x0
+; CHECK-OUTLINE-O1-NEXT: mov w0, w1
+; CHECK-OUTLINE-O1-NEXT: mov x1, x2
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_ldadd4_acq_rel
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
+; CHECK-NOLSE-O0-LABEL: atomicrmw_add_i32:
+; CHECK-NOLSE-O0: ; %bb.0:
+; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: ldr w8, [x0]
+; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: b LBB47_1
+; CHECK-NOLSE-O0-NEXT: LBB47_1: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1
+; CHECK-NOLSE-O0-NEXT: ; Child Loop BB47_2 Depth 2
+; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: add w12, w8, w9
+; CHECK-NOLSE-O0-NEXT: LBB47_2: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB47_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2
+; CHECK-NOLSE-O0-NEXT: ldaxr w9, [x11]
+; CHECK-NOLSE-O0-NEXT: cmp w9, w8
+; CHECK-NOLSE-O0-NEXT: b.ne LBB47_4
+; CHECK-NOLSE-O0-NEXT: ; %bb.3: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB47_2 Depth=2
+; CHECK-NOLSE-O0-NEXT: stlxr w10, w12, [x11]
+; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB47_2
+; CHECK-NOLSE-O0-NEXT: LBB47_4: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB47_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: subs w8, w9, w8
+; CHECK-NOLSE-O0-NEXT: cset w8, eq
+; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB47_1
+; CHECK-NOLSE-O0-NEXT: b LBB47_5
+; CHECK-NOLSE-O0-NEXT: LBB47_5: ; %atomicrmw.end
+; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: ret
+;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_add_i32:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov w0, w1
+; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldadd4_acq_rel
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
+; CHECK-LSE-O1-LABEL: atomicrmw_add_i32:
+; CHECK-LSE-O1: ; %bb.0:
+; CHECK-LSE-O1-NEXT: ldaddal w1, w0, [x0]
+; CHECK-LSE-O1-NEXT: ret
+;
+; CHECK-LSE-O0-LABEL: atomicrmw_add_i32:
+; CHECK-LSE-O0: ; %bb.0:
+; CHECK-LSE-O0-NEXT: ldaddal w1, w0, [x0]
+; CHECK-LSE-O0-NEXT: ret
+ %res = atomicrmw add ptr %ptr, i32 %rhs seq_cst
+ ret i32 %res
+}
+
+define i32 @atomicrmw_xchg_i32(ptr %ptr, i32 %rhs) {
+; CHECK-NOLSE-O1-LABEL: atomicrmw_xchg_i32:
+; CHECK-NOLSE-O1: ; %bb.0:
+; CHECK-NOLSE-O1-NEXT: mov x8, x0
+; CHECK-NOLSE-O1-NEXT: LBB48_1: ; %atomicrmw.start
+; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O1-NEXT: ldxr w0, [x8]
+; CHECK-NOLSE-O1-NEXT: stxr w9, w1, [x8]
+; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB48_1
+; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0
+; CHECK-NOLSE-O1-NEXT: ret
+;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_xchg_i32:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: .cfi_def_cfa_offset 16
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O1-NEXT: mov x2, x0
+; CHECK-OUTLINE-O1-NEXT: mov w0, w1
+; CHECK-OUTLINE-O1-NEXT: mov x1, x2
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_swp4_relax
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
+; CHECK-NOLSE-O0-LABEL: atomicrmw_xchg_i32:
+; CHECK-NOLSE-O0: ; %bb.0:
+; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: ldr w8, [x0]
+; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: b LBB48_1
+; CHECK-NOLSE-O0-NEXT: LBB48_1: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1
+; CHECK-NOLSE-O0-NEXT: ; Child Loop BB48_2 Depth 2
+; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr w12, [sp, #24] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: LBB48_2: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB48_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2
+; CHECK-NOLSE-O0-NEXT: ldaxr w9, [x11]
+; CHECK-NOLSE-O0-NEXT: cmp w9, w8
+; CHECK-NOLSE-O0-NEXT: b.ne LBB48_4
+; CHECK-NOLSE-O0-NEXT: ; %bb.3: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB48_2 Depth=2
+; CHECK-NOLSE-O0-NEXT: stlxr w10, w12, [x11]
+; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB48_2
+; CHECK-NOLSE-O0-NEXT: LBB48_4: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB48_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: subs w8, w9, w8
+; CHECK-NOLSE-O0-NEXT: cset w8, eq
+; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB48_1
+; CHECK-NOLSE-O0-NEXT: b LBB48_5
+; CHECK-NOLSE-O0-NEXT: LBB48_5: ; %atomicrmw.end
+; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: ret
+;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_xchg_i32:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov w0, w1
+; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_swp4_relax
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
+; CHECK-LSE-O1-LABEL: atomicrmw_xchg_i32:
+; CHECK-LSE-O1: ; %bb.0:
+; CHECK-LSE-O1-NEXT: swp w1, w0, [x0]
+; CHECK-LSE-O1-NEXT: ret
+;
+; CHECK-LSE-O0-LABEL: atomicrmw_xchg_i32:
+; CHECK-LSE-O0: ; %bb.0:
+; CHECK-LSE-O0-NEXT: swp w1, w0, [x0]
+; CHECK-LSE-O0-NEXT: ret
+ %res = atomicrmw xchg ptr %ptr, i32 %rhs monotonic
+ ret i32 %res
+}
+
+define i32 @atomicrmw_sub_i32(ptr %ptr, i32 %rhs) {
+; CHECK-NOLSE-O1-LABEL: atomicrmw_sub_i32:
+; CHECK-NOLSE-O1: ; %bb.0:
+; CHECK-NOLSE-O1-NEXT: LBB49_1: ; %atomicrmw.start
+; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O1-NEXT: ldaxr w8, [x0]
+; CHECK-NOLSE-O1-NEXT: sub w9, w8, w1
+; CHECK-NOLSE-O1-NEXT: stxr w10, w9, [x0]
+; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB49_1
+; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-NOLSE-O1-NEXT: mov w0, w8
+; CHECK-NOLSE-O1-NEXT: ret
+;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_sub_i32:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: .cfi_def_cfa_offset 16
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O1-NEXT: mov x2, x0
+; CHECK-OUTLINE-O1-NEXT: neg w0, w1
+; CHECK-OUTLINE-O1-NEXT: mov x1, x2
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_ldadd4_acq
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
+; CHECK-NOLSE-O0-LABEL: atomicrmw_sub_i32:
+; CHECK-NOLSE-O0: ; %bb.0:
+; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: ldr w8, [x0]
+; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: b LBB49_1
+; CHECK-NOLSE-O0-NEXT: LBB49_1: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1
+; CHECK-NOLSE-O0-NEXT: ; Child Loop BB49_2 Depth 2
+; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: subs w12, w8, w9
+; CHECK-NOLSE-O0-NEXT: LBB49_2: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB49_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2
+; CHECK-NOLSE-O0-NEXT: ldaxr w9, [x11]
+; CHECK-NOLSE-O0-NEXT: cmp w9, w8
+; CHECK-NOLSE-O0-NEXT: b.ne LBB49_4
+; CHECK-NOLSE-O0-NEXT: ; %bb.3: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB49_2 Depth=2
+; CHECK-NOLSE-O0-NEXT: stlxr w10, w12, [x11]
+; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB49_2
+; CHECK-NOLSE-O0-NEXT: LBB49_4: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB49_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: subs w8, w9, w8
+; CHECK-NOLSE-O0-NEXT: cset w8, eq
+; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB49_1
+; CHECK-NOLSE-O0-NEXT: b LBB49_5
+; CHECK-NOLSE-O0-NEXT: LBB49_5: ; %atomicrmw.end
+; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: ret
+;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_sub_i32:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov w9, w1
+; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: mov w8, wzr
+; CHECK-OUTLINE-O0-NEXT: subs w0, w8, w9
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldadd4_acq
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
+; CHECK-LSE-O1-LABEL: atomicrmw_sub_i32:
+; CHECK-LSE-O1: ; %bb.0:
+; CHECK-LSE-O1-NEXT: neg w8, w1
+; CHECK-LSE-O1-NEXT: ldadda w8, w0, [x0]
+; CHECK-LSE-O1-NEXT: ret
+;
+; CHECK-LSE-O0-LABEL: atomicrmw_sub_i32:
+; CHECK-LSE-O0: ; %bb.0:
+; CHECK-LSE-O0-NEXT: neg w8, w1
+; CHECK-LSE-O0-NEXT: ldadda w8, w0, [x0]
+; CHECK-LSE-O0-NEXT: ret
+ %res = atomicrmw sub ptr %ptr, i32 %rhs acquire
+ ret i32 %res
+}
+
+define i32 @atomicrmw_and_i32(ptr %ptr, i32 %rhs) {
+; CHECK-NOLSE-O1-LABEL: atomicrmw_and_i32:
+; CHECK-NOLSE-O1: ; %bb.0:
+; CHECK-NOLSE-O1-NEXT: LBB50_1: ; %atomicrmw.start
+; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O1-NEXT: ldxr w8, [x0]
+; CHECK-NOLSE-O1-NEXT: and w9, w8, w1
+; CHECK-NOLSE-O1-NEXT: stlxr w10, w9, [x0]
+; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB50_1
+; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-NOLSE-O1-NEXT: mov w0, w8
+; CHECK-NOLSE-O1-NEXT: ret
+;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_and_i32:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: .cfi_def_cfa_offset 16
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O1-NEXT: mov x2, x0
+; CHECK-OUTLINE-O1-NEXT: mov w8, #-1 ; =0xffffffff
+; CHECK-OUTLINE-O1-NEXT: eor w0, w8, w1
+; CHECK-OUTLINE-O1-NEXT: mov x1, x2
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_ldclr4_rel
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
+; CHECK-NOLSE-O0-LABEL: atomicrmw_and_i32:
+; CHECK-NOLSE-O0: ; %bb.0:
+; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: ldr w8, [x0]
+; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: b LBB50_1
+; CHECK-NOLSE-O0-NEXT: LBB50_1: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1
+; CHECK-NOLSE-O0-NEXT: ; Child Loop BB50_2 Depth 2
+; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: and w12, w8, w9
+; CHECK-NOLSE-O0-NEXT: LBB50_2: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB50_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2
+; CHECK-NOLSE-O0-NEXT: ldaxr w9, [x11]
+; CHECK-NOLSE-O0-NEXT: cmp w9, w8
+; CHECK-NOLSE-O0-NEXT: b.ne LBB50_4
+; CHECK-NOLSE-O0-NEXT: ; %bb.3: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB50_2 Depth=2
+; CHECK-NOLSE-O0-NEXT: stlxr w10, w12, [x11]
+; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB50_2
+; CHECK-NOLSE-O0-NEXT: LBB50_4: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB50_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: subs w8, w9, w8
+; CHECK-NOLSE-O0-NEXT: cset w8, eq
+; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB50_1
+; CHECK-NOLSE-O0-NEXT: b LBB50_5
+; CHECK-NOLSE-O0-NEXT: LBB50_5: ; %atomicrmw.end
+; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: ret
+;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_and_i32:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov w9, w1
+; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: mov w8, #-1 ; =0xffffffff
+; CHECK-OUTLINE-O0-NEXT: eor w0, w8, w9
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldclr4_rel
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
+; CHECK-LSE-O1-LABEL: atomicrmw_and_i32:
+; CHECK-LSE-O1: ; %bb.0:
+; CHECK-LSE-O1-NEXT: mvn w8, w1
+; CHECK-LSE-O1-NEXT: ldclrl w8, w0, [x0]
+; CHECK-LSE-O1-NEXT: ret
+;
+; CHECK-LSE-O0-LABEL: atomicrmw_and_i32:
+; CHECK-LSE-O0: ; %bb.0:
+; CHECK-LSE-O0-NEXT: mvn w8, w1
+; CHECK-LSE-O0-NEXT: ldclrl w8, w0, [x0]
+; CHECK-LSE-O0-NEXT: ret
+ %res = atomicrmw and ptr %ptr, i32 %rhs release
+ ret i32 %res
+}
+
+define i32 @atomicrmw_or_i32(ptr %ptr, i32 %rhs) {
+; CHECK-NOLSE-O1-LABEL: atomicrmw_or_i32:
+; CHECK-NOLSE-O1: ; %bb.0:
+; CHECK-NOLSE-O1-NEXT: LBB51_1: ; %atomicrmw.start
+; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O1-NEXT: ldaxr w8, [x0]
+; CHECK-NOLSE-O1-NEXT: orr w9, w8, w1
+; CHECK-NOLSE-O1-NEXT: stlxr w10, w9, [x0]
+; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB51_1
+; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-NOLSE-O1-NEXT: mov w0, w8
+; CHECK-NOLSE-O1-NEXT: ret
+;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_or_i32:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: .cfi_def_cfa_offset 16
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O1-NEXT: mov x2, x0
+; CHECK-OUTLINE-O1-NEXT: mov w0, w1
+; CHECK-OUTLINE-O1-NEXT: mov x1, x2
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_ldset4_acq_rel
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
+; CHECK-NOLSE-O0-LABEL: atomicrmw_or_i32:
+; CHECK-NOLSE-O0: ; %bb.0:
+; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: ldr w8, [x0]
+; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: b LBB51_1
+; CHECK-NOLSE-O0-NEXT: LBB51_1: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1
+; CHECK-NOLSE-O0-NEXT: ; Child Loop BB51_2 Depth 2
+; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: orr w12, w8, w9
+; CHECK-NOLSE-O0-NEXT: LBB51_2: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB51_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2
+; CHECK-NOLSE-O0-NEXT: ldaxr w9, [x11]
+; CHECK-NOLSE-O0-NEXT: cmp w9, w8
+; CHECK-NOLSE-O0-NEXT: b.ne LBB51_4
+; CHECK-NOLSE-O0-NEXT: ; %bb.3: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB51_2 Depth=2
+; CHECK-NOLSE-O0-NEXT: stlxr w10, w12, [x11]
+; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB51_2
+; CHECK-NOLSE-O0-NEXT: LBB51_4: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB51_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: subs w8, w9, w8
+; CHECK-NOLSE-O0-NEXT: cset w8, eq
+; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB51_1
+; CHECK-NOLSE-O0-NEXT: b LBB51_5
+; CHECK-NOLSE-O0-NEXT: LBB51_5: ; %atomicrmw.end
+; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: ret
+;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_or_i32:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov w0, w1
+; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldset4_acq_rel
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
+; CHECK-LSE-O1-LABEL: atomicrmw_or_i32:
+; CHECK-LSE-O1: ; %bb.0:
+; CHECK-LSE-O1-NEXT: ldsetal w1, w0, [x0]
+; CHECK-LSE-O1-NEXT: ret
+;
+; CHECK-LSE-O0-LABEL: atomicrmw_or_i32:
+; CHECK-LSE-O0: ; %bb.0:
+; CHECK-LSE-O0-NEXT: ldsetal w1, w0, [x0]
+; CHECK-LSE-O0-NEXT: ret
+ %res = atomicrmw or ptr %ptr, i32 %rhs seq_cst
+ ret i32 %res
+}
+
+define i32 @atomicrmw_xor_i32(ptr %ptr, i32 %rhs) {
+; CHECK-NOLSE-O1-LABEL: atomicrmw_xor_i32:
+; CHECK-NOLSE-O1: ; %bb.0:
+; CHECK-NOLSE-O1-NEXT: LBB52_1: ; %atomicrmw.start
+; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O1-NEXT: ldxr w8, [x0]
+; CHECK-NOLSE-O1-NEXT: eor w9, w8, w1
+; CHECK-NOLSE-O1-NEXT: stxr w10, w9, [x0]
+; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB52_1
+; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-NOLSE-O1-NEXT: mov w0, w8
+; CHECK-NOLSE-O1-NEXT: ret
+;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_xor_i32:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: .cfi_def_cfa_offset 16
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O1-NEXT: mov x2, x0
+; CHECK-OUTLINE-O1-NEXT: mov w0, w1
+; CHECK-OUTLINE-O1-NEXT: mov x1, x2
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_ldeor4_relax
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
+; CHECK-NOLSE-O0-LABEL: atomicrmw_xor_i32:
+; CHECK-NOLSE-O0: ; %bb.0:
+; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: ldr w8, [x0]
+; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: b LBB52_1
+; CHECK-NOLSE-O0-NEXT: LBB52_1: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1
+; CHECK-NOLSE-O0-NEXT: ; Child Loop BB52_2 Depth 2
+; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: eor w12, w8, w9
+; CHECK-NOLSE-O0-NEXT: LBB52_2: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB52_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2
+; CHECK-NOLSE-O0-NEXT: ldaxr w9, [x11]
+; CHECK-NOLSE-O0-NEXT: cmp w9, w8
+; CHECK-NOLSE-O0-NEXT: b.ne LBB52_4
+; CHECK-NOLSE-O0-NEXT: ; %bb.3: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB52_2 Depth=2
+; CHECK-NOLSE-O0-NEXT: stlxr w10, w12, [x11]
+; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB52_2
+; CHECK-NOLSE-O0-NEXT: LBB52_4: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB52_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: subs w8, w9, w8
+; CHECK-NOLSE-O0-NEXT: cset w8, eq
+; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB52_1
+; CHECK-NOLSE-O0-NEXT: b LBB52_5
+; CHECK-NOLSE-O0-NEXT: LBB52_5: ; %atomicrmw.end
+; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: ret
+;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_xor_i32:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov w0, w1
+; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldeor4_relax
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
+; CHECK-LSE-O1-LABEL: atomicrmw_xor_i32:
+; CHECK-LSE-O1: ; %bb.0:
+; CHECK-LSE-O1-NEXT: ldeor w1, w0, [x0]
+; CHECK-LSE-O1-NEXT: ret
+;
+; CHECK-LSE-O0-LABEL: atomicrmw_xor_i32:
+; CHECK-LSE-O0: ; %bb.0:
+; CHECK-LSE-O0-NEXT: ldeor w1, w0, [x0]
+; CHECK-LSE-O0-NEXT: ret
+ %res = atomicrmw xor ptr %ptr, i32 %rhs monotonic
+ ret i32 %res
+}
+
+define i32 @atomicrmw_min_i32(ptr %ptr, i32 %rhs) {
+; CHECK-NOLSE-O1-LABEL: atomicrmw_min_i32:
+; CHECK-NOLSE-O1: ; %bb.0:
+; CHECK-NOLSE-O1-NEXT: LBB53_1: ; %atomicrmw.start
+; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O1-NEXT: ldaxr w8, [x0]
+; CHECK-NOLSE-O1-NEXT: cmp w8, w1
+; CHECK-NOLSE-O1-NEXT: csel w9, w8, w1, le
+; CHECK-NOLSE-O1-NEXT: stxr w10, w9, [x0]
+; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB53_1
+; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-NOLSE-O1-NEXT: mov w0, w8
+; CHECK-NOLSE-O1-NEXT: ret
+;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_min_i32:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: LBB53_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O1-NEXT: ldaxr w8, [x0]
+; CHECK-OUTLINE-O1-NEXT: cmp w8, w1
+; CHECK-OUTLINE-O1-NEXT: csel w9, w8, w1, le
+; CHECK-OUTLINE-O1-NEXT: stxr w10, w9, [x0]
+; CHECK-OUTLINE-O1-NEXT: cbnz w10, LBB53_1
+; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-OUTLINE-O1-NEXT: mov w0, w8
+; CHECK-OUTLINE-O1-NEXT: ret
+;
+; CHECK-NOLSE-O0-LABEL: atomicrmw_min_i32:
+; CHECK-NOLSE-O0: ; %bb.0:
+; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: ldr w8, [x0]
+; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: b LBB53_1
+; CHECK-NOLSE-O0-NEXT: LBB53_1: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1
+; CHECK-NOLSE-O0-NEXT: ; Child Loop BB53_2 Depth 2
+; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: subs w10, w8, w9
+; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, le
+; CHECK-NOLSE-O0-NEXT: LBB53_2: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB53_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2
+; CHECK-NOLSE-O0-NEXT: ldaxr w9, [x11]
+; CHECK-NOLSE-O0-NEXT: cmp w9, w8
+; CHECK-NOLSE-O0-NEXT: b.ne LBB53_4
+; CHECK-NOLSE-O0-NEXT: ; %bb.3: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB53_2 Depth=2
+; CHECK-NOLSE-O0-NEXT: stlxr w10, w12, [x11]
+; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB53_2
+; CHECK-NOLSE-O0-NEXT: LBB53_4: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB53_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: subs w8, w9, w8
+; CHECK-NOLSE-O0-NEXT: cset w8, eq
+; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB53_1
+; CHECK-NOLSE-O0-NEXT: b LBB53_5
+; CHECK-NOLSE-O0-NEXT: LBB53_5: ; %atomicrmw.end
+; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: ret
+;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_min_i32:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #48
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 48
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: ldr w0, [x0]
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: b LBB53_1
+; CHECK-OUTLINE-O0-NEXT: LBB53_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: subs w9, w0, w8
+; CHECK-OUTLINE-O0-NEXT: csel w1, w0, w8, le
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas4_acq
+; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #8] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: subs w8, w0, w8
+; CHECK-OUTLINE-O0-NEXT: cset w8, eq
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB53_1
+; CHECK-OUTLINE-O0-NEXT: b LBB53_2
+; CHECK-OUTLINE-O0-NEXT: LBB53_2: ; %atomicrmw.end
+; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #48
+; CHECK-OUTLINE-O0-NEXT: ret
+;
+; CHECK-LSE-O1-LABEL: atomicrmw_min_i32:
+; CHECK-LSE-O1: ; %bb.0:
+; CHECK-LSE-O1-NEXT: ldsmina w1, w0, [x0]
+; CHECK-LSE-O1-NEXT: ret
+;
+; CHECK-LSE-O0-LABEL: atomicrmw_min_i32:
+; CHECK-LSE-O0: ; %bb.0:
+; CHECK-LSE-O0-NEXT: ldsmina w1, w0, [x0]
+; CHECK-LSE-O0-NEXT: ret
+ %res = atomicrmw min ptr %ptr, i32 %rhs acquire
+ ret i32 %res
+}
+
+define i32 @atomicrmw_max_i32(ptr %ptr, i32 %rhs) {
+; CHECK-NOLSE-O1-LABEL: atomicrmw_max_i32:
+; CHECK-NOLSE-O1: ; %bb.0:
+; CHECK-NOLSE-O1-NEXT: LBB54_1: ; %atomicrmw.start
+; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O1-NEXT: ldxr w8, [x0]
+; CHECK-NOLSE-O1-NEXT: cmp w8, w1
+; CHECK-NOLSE-O1-NEXT: csel w9, w8, w1, gt
+; CHECK-NOLSE-O1-NEXT: stlxr w10, w9, [x0]
+; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB54_1
+; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-NOLSE-O1-NEXT: mov w0, w8
+; CHECK-NOLSE-O1-NEXT: ret
+;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_max_i32:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: LBB54_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O1-NEXT: ldxr w8, [x0]
+; CHECK-OUTLINE-O1-NEXT: cmp w8, w1
+; CHECK-OUTLINE-O1-NEXT: csel w9, w8, w1, gt
+; CHECK-OUTLINE-O1-NEXT: stlxr w10, w9, [x0]
+; CHECK-OUTLINE-O1-NEXT: cbnz w10, LBB54_1
+; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-OUTLINE-O1-NEXT: mov w0, w8
+; CHECK-OUTLINE-O1-NEXT: ret
+;
+; CHECK-NOLSE-O0-LABEL: atomicrmw_max_i32:
+; CHECK-NOLSE-O0: ; %bb.0:
+; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: ldr w8, [x0]
+; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: b LBB54_1
+; CHECK-NOLSE-O0-NEXT: LBB54_1: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1
+; CHECK-NOLSE-O0-NEXT: ; Child Loop BB54_2 Depth 2
+; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: subs w10, w8, w9
+; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, gt
+; CHECK-NOLSE-O0-NEXT: LBB54_2: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB54_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2
+; CHECK-NOLSE-O0-NEXT: ldaxr w9, [x11]
+; CHECK-NOLSE-O0-NEXT: cmp w9, w8
+; CHECK-NOLSE-O0-NEXT: b.ne LBB54_4
+; CHECK-NOLSE-O0-NEXT: ; %bb.3: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB54_2 Depth=2
+; CHECK-NOLSE-O0-NEXT: stlxr w10, w12, [x11]
+; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB54_2
+; CHECK-NOLSE-O0-NEXT: LBB54_4: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB54_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: subs w8, w9, w8
+; CHECK-NOLSE-O0-NEXT: cset w8, eq
+; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB54_1
+; CHECK-NOLSE-O0-NEXT: b LBB54_5
+; CHECK-NOLSE-O0-NEXT: LBB54_5: ; %atomicrmw.end
+; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: ret
+;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_max_i32:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #48
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 48
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: ldr w0, [x0]
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: b LBB54_1
+; CHECK-OUTLINE-O0-NEXT: LBB54_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: subs w9, w0, w8
+; CHECK-OUTLINE-O0-NEXT: csel w1, w0, w8, gt
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas4_rel
+; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #8] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: subs w8, w0, w8
+; CHECK-OUTLINE-O0-NEXT: cset w8, eq
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB54_1
+; CHECK-OUTLINE-O0-NEXT: b LBB54_2
+; CHECK-OUTLINE-O0-NEXT: LBB54_2: ; %atomicrmw.end
+; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #48
+; CHECK-OUTLINE-O0-NEXT: ret
+;
+; CHECK-LSE-O1-LABEL: atomicrmw_max_i32:
+; CHECK-LSE-O1: ; %bb.0:
+; CHECK-LSE-O1-NEXT: ldsmaxl w1, w0, [x0]
+; CHECK-LSE-O1-NEXT: ret
+;
+; CHECK-LSE-O0-LABEL: atomicrmw_max_i32:
+; CHECK-LSE-O0: ; %bb.0:
+; CHECK-LSE-O0-NEXT: ldsmaxl w1, w0, [x0]
+; CHECK-LSE-O0-NEXT: ret
+ %res = atomicrmw max ptr %ptr, i32 %rhs release
+ ret i32 %res
+}
+
+define i32 @atomicrmw_umin_i32(ptr %ptr, i32 %rhs) {
+; CHECK-NOLSE-O1-LABEL: atomicrmw_umin_i32:
+; CHECK-NOLSE-O1: ; %bb.0:
+; CHECK-NOLSE-O1-NEXT: LBB55_1: ; %atomicrmw.start
+; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O1-NEXT: ldaxr w8, [x0]
+; CHECK-NOLSE-O1-NEXT: cmp w8, w1
+; CHECK-NOLSE-O1-NEXT: csel w9, w8, w1, ls
+; CHECK-NOLSE-O1-NEXT: stlxr w10, w9, [x0]
+; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB55_1
+; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-NOLSE-O1-NEXT: mov w0, w8
+; CHECK-NOLSE-O1-NEXT: ret
+;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_umin_i32:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: LBB55_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O1-NEXT: ldaxr w8, [x0]
+; CHECK-OUTLINE-O1-NEXT: cmp w8, w1
+; CHECK-OUTLINE-O1-NEXT: csel w9, w8, w1, ls
+; CHECK-OUTLINE-O1-NEXT: stlxr w10, w9, [x0]
+; CHECK-OUTLINE-O1-NEXT: cbnz w10, LBB55_1
+; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-OUTLINE-O1-NEXT: mov w0, w8
+; CHECK-OUTLINE-O1-NEXT: ret
+;
+; CHECK-NOLSE-O0-LABEL: atomicrmw_umin_i32:
+; CHECK-NOLSE-O0: ; %bb.0:
+; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: ldr w8, [x0]
+; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: b LBB55_1
+; CHECK-NOLSE-O0-NEXT: LBB55_1: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1
+; CHECK-NOLSE-O0-NEXT: ; Child Loop BB55_2 Depth 2
+; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: subs w10, w8, w9
+; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, ls
+; CHECK-NOLSE-O0-NEXT: LBB55_2: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB55_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2
+; CHECK-NOLSE-O0-NEXT: ldaxr w9, [x11]
+; CHECK-NOLSE-O0-NEXT: cmp w9, w8
+; CHECK-NOLSE-O0-NEXT: b.ne LBB55_4
+; CHECK-NOLSE-O0-NEXT: ; %bb.3: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB55_2 Depth=2
+; CHECK-NOLSE-O0-NEXT: stlxr w10, w12, [x11]
+; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB55_2
+; CHECK-NOLSE-O0-NEXT: LBB55_4: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB55_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: subs w8, w9, w8
+; CHECK-NOLSE-O0-NEXT: cset w8, eq
+; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB55_1
+; CHECK-NOLSE-O0-NEXT: b LBB55_5
+; CHECK-NOLSE-O0-NEXT: LBB55_5: ; %atomicrmw.end
+; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: ret
+;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_umin_i32:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #48
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 48
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: ldr w0, [x0]
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: b LBB55_1
+; CHECK-OUTLINE-O0-NEXT: LBB55_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: subs w9, w0, w8
+; CHECK-OUTLINE-O0-NEXT: csel w1, w0, w8, ls
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas4_acq_rel
+; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #8] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: subs w8, w0, w8
+; CHECK-OUTLINE-O0-NEXT: cset w8, eq
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB55_1
+; CHECK-OUTLINE-O0-NEXT: b LBB55_2
+; CHECK-OUTLINE-O0-NEXT: LBB55_2: ; %atomicrmw.end
+; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #48
+; CHECK-OUTLINE-O0-NEXT: ret
+;
+; CHECK-LSE-O1-LABEL: atomicrmw_umin_i32:
+; CHECK-LSE-O1: ; %bb.0:
+; CHECK-LSE-O1-NEXT: lduminal w1, w0, [x0]
+; CHECK-LSE-O1-NEXT: ret
+;
+; CHECK-LSE-O0-LABEL: atomicrmw_umin_i32:
+; CHECK-LSE-O0: ; %bb.0:
+; CHECK-LSE-O0-NEXT: lduminal w1, w0, [x0]
+; CHECK-LSE-O0-NEXT: ret
+ %res = atomicrmw umin ptr %ptr, i32 %rhs seq_cst
+ ret i32 %res
+}
+
+define i32 @atomicrmw_umax_i32(ptr %ptr, i32 %rhs) {
+; CHECK-NOLSE-O1-LABEL: atomicrmw_umax_i32:
+; CHECK-NOLSE-O1: ; %bb.0:
+; CHECK-NOLSE-O1-NEXT: LBB56_1: ; %atomicrmw.start
+; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O1-NEXT: ldxr w8, [x0]
+; CHECK-NOLSE-O1-NEXT: cmp w8, w1
+; CHECK-NOLSE-O1-NEXT: csel w9, w8, w1, hi
+; CHECK-NOLSE-O1-NEXT: stxr w10, w9, [x0]
+; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB56_1
+; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-NOLSE-O1-NEXT: mov w0, w8
+; CHECK-NOLSE-O1-NEXT: ret
+;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_umax_i32:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: LBB56_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O1-NEXT: ldxr w8, [x0]
+; CHECK-OUTLINE-O1-NEXT: cmp w8, w1
+; CHECK-OUTLINE-O1-NEXT: csel w9, w8, w1, hi
+; CHECK-OUTLINE-O1-NEXT: stxr w10, w9, [x0]
+; CHECK-OUTLINE-O1-NEXT: cbnz w10, LBB56_1
+; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-OUTLINE-O1-NEXT: mov w0, w8
+; CHECK-OUTLINE-O1-NEXT: ret
+;
+; CHECK-NOLSE-O0-LABEL: atomicrmw_umax_i32:
+; CHECK-NOLSE-O0: ; %bb.0:
+; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: ldr w8, [x0]
+; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: b LBB56_1
+; CHECK-NOLSE-O0-NEXT: LBB56_1: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1
+; CHECK-NOLSE-O0-NEXT: ; Child Loop BB56_2 Depth 2
+; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: subs w10, w8, w9
+; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, hi
+; CHECK-NOLSE-O0-NEXT: LBB56_2: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB56_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2
+; CHECK-NOLSE-O0-NEXT: ldaxr w9, [x11]
+; CHECK-NOLSE-O0-NEXT: cmp w9, w8
+; CHECK-NOLSE-O0-NEXT: b.ne LBB56_4
+; CHECK-NOLSE-O0-NEXT: ; %bb.3: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB56_2 Depth=2
+; CHECK-NOLSE-O0-NEXT: stlxr w10, w12, [x11]
+; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB56_2
+; CHECK-NOLSE-O0-NEXT: LBB56_4: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB56_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: subs w8, w9, w8
+; CHECK-NOLSE-O0-NEXT: cset w8, eq
+; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB56_1
+; CHECK-NOLSE-O0-NEXT: b LBB56_5
+; CHECK-NOLSE-O0-NEXT: LBB56_5: ; %atomicrmw.end
+; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: ret
+;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_umax_i32:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #48
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 48
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: ldr w0, [x0]
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: b LBB56_1
+; CHECK-OUTLINE-O0-NEXT: LBB56_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: subs w9, w0, w8
+; CHECK-OUTLINE-O0-NEXT: csel w1, w0, w8, hi
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas4_relax
+; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #8] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: subs w8, w0, w8
+; CHECK-OUTLINE-O0-NEXT: cset w8, eq
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB56_1
+; CHECK-OUTLINE-O0-NEXT: b LBB56_2
+; CHECK-OUTLINE-O0-NEXT: LBB56_2: ; %atomicrmw.end
+; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #48
+; CHECK-OUTLINE-O0-NEXT: ret
+;
+; CHECK-LSE-O1-LABEL: atomicrmw_umax_i32:
+; CHECK-LSE-O1: ; %bb.0:
+; CHECK-LSE-O1-NEXT: ldumax w1, w0, [x0]
+; CHECK-LSE-O1-NEXT: ret
+;
+; CHECK-LSE-O0-LABEL: atomicrmw_umax_i32:
+; CHECK-LSE-O0: ; %bb.0:
+; CHECK-LSE-O0-NEXT: ldumax w1, w0, [x0]
+; CHECK-LSE-O0-NEXT: ret
+ %res = atomicrmw umax ptr %ptr, i32 %rhs monotonic
+ ret i32 %res
+}
+
+define i64 @atomicrmw_add_i64(ptr %ptr, i64 %rhs) {
+; CHECK-NOLSE-O1-LABEL: atomicrmw_add_i64:
+; CHECK-NOLSE-O1: ; %bb.0:
+; CHECK-NOLSE-O1-NEXT: LBB57_1: ; %atomicrmw.start
+; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O1-NEXT: ldaxr x8, [x0]
+; CHECK-NOLSE-O1-NEXT: add x9, x8, x1
+; CHECK-NOLSE-O1-NEXT: stlxr w10, x9, [x0]
+; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB57_1
+; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-NOLSE-O1-NEXT: mov x0, x8
+; CHECK-NOLSE-O1-NEXT: ret
+;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_add_i64:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: .cfi_def_cfa_offset 16
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O1-NEXT: mov x2, x0
+; CHECK-OUTLINE-O1-NEXT: mov x0, x1
+; CHECK-OUTLINE-O1-NEXT: mov x1, x2
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_ldadd8_acq_rel
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
+; CHECK-NOLSE-O0-LABEL: atomicrmw_add_i64:
+; CHECK-NOLSE-O0: ; %bb.0:
+; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NOLSE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: str x1, [sp, #16] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: ldr x8, [x0]
+; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: b LBB57_1
+; CHECK-NOLSE-O0-NEXT: LBB57_1: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1
+; CHECK-NOLSE-O0-NEXT: ; Child Loop BB57_2 Depth 2
+; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #8] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr x9, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: add x12, x8, x9
+; CHECK-NOLSE-O0-NEXT: LBB57_2: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB57_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2
+; CHECK-NOLSE-O0-NEXT: ldaxr x9, [x11]
+; CHECK-NOLSE-O0-NEXT: cmp x9, x8
+; CHECK-NOLSE-O0-NEXT: b.ne LBB57_4
+; CHECK-NOLSE-O0-NEXT: ; %bb.3: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB57_2 Depth=2
+; CHECK-NOLSE-O0-NEXT: stlxr w10, x12, [x11]
+; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB57_2
+; CHECK-NOLSE-O0-NEXT: LBB57_4: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB57_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: str x9, [sp] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: subs x8, x9, x8
+; CHECK-NOLSE-O0-NEXT: cset w8, eq
+; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB57_1
+; CHECK-NOLSE-O0-NEXT: b LBB57_5
+; CHECK-NOLSE-O0-NEXT: LBB57_5: ; %atomicrmw.end
+; CHECK-NOLSE-O0-NEXT: ldr x0, [sp] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: ret
+;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_add_i64:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov x0, x1
+; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldadd8_acq_rel
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
+; CHECK-LSE-O1-LABEL: atomicrmw_add_i64:
+; CHECK-LSE-O1: ; %bb.0:
+; CHECK-LSE-O1-NEXT: ldaddal x1, x0, [x0]
+; CHECK-LSE-O1-NEXT: ret
+;
+; CHECK-LSE-O0-LABEL: atomicrmw_add_i64:
+; CHECK-LSE-O0: ; %bb.0:
+; CHECK-LSE-O0-NEXT: ldaddal x1, x0, [x0]
+; CHECK-LSE-O0-NEXT: ret
+ %res = atomicrmw add ptr %ptr, i64 %rhs seq_cst
+ ret i64 %res
+}
+
+define i64 @atomicrmw_xchg_i64(ptr %ptr, i64 %rhs) {
+; CHECK-NOLSE-O1-LABEL: atomicrmw_xchg_i64:
+; CHECK-NOLSE-O1: ; %bb.0:
+; CHECK-NOLSE-O1-NEXT: LBB58_1: ; %atomicrmw.start
+; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O1-NEXT: ldxr x8, [x0]
+; CHECK-NOLSE-O1-NEXT: stxr w9, x1, [x0]
+; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB58_1
+; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-NOLSE-O1-NEXT: mov x0, x8
+; CHECK-NOLSE-O1-NEXT: ret
+;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_xchg_i64:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: .cfi_def_cfa_offset 16
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O1-NEXT: mov x2, x0
+; CHECK-OUTLINE-O1-NEXT: mov x0, x1
+; CHECK-OUTLINE-O1-NEXT: mov x1, x2
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_swp8_relax
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
+; CHECK-NOLSE-O0-LABEL: atomicrmw_xchg_i64:
+; CHECK-NOLSE-O0: ; %bb.0:
+; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NOLSE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: str x1, [sp, #16] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: ldr x8, [x0]
+; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: b LBB58_1
+; CHECK-NOLSE-O0-NEXT: LBB58_1: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1
+; CHECK-NOLSE-O0-NEXT: ; Child Loop BB58_2 Depth 2
+; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #8] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr x12, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: LBB58_2: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB58_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2
+; CHECK-NOLSE-O0-NEXT: ldaxr x9, [x11]
+; CHECK-NOLSE-O0-NEXT: cmp x9, x8
+; CHECK-NOLSE-O0-NEXT: b.ne LBB58_4
+; CHECK-NOLSE-O0-NEXT: ; %bb.3: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB58_2 Depth=2
+; CHECK-NOLSE-O0-NEXT: stlxr w10, x12, [x11]
+; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB58_2
+; CHECK-NOLSE-O0-NEXT: LBB58_4: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB58_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: str x9, [sp] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: subs x8, x9, x8
+; CHECK-NOLSE-O0-NEXT: cset w8, eq
+; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB58_1
+; CHECK-NOLSE-O0-NEXT: b LBB58_5
+; CHECK-NOLSE-O0-NEXT: LBB58_5: ; %atomicrmw.end
+; CHECK-NOLSE-O0-NEXT: ldr x0, [sp] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: ret
+;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_xchg_i64:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov x0, x1
+; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_swp8_relax
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
+; CHECK-LSE-O1-LABEL: atomicrmw_xchg_i64:
+; CHECK-LSE-O1: ; %bb.0:
+; CHECK-LSE-O1-NEXT: swp x1, x0, [x0]
+; CHECK-LSE-O1-NEXT: ret
+;
+; CHECK-LSE-O0-LABEL: atomicrmw_xchg_i64:
+; CHECK-LSE-O0: ; %bb.0:
+; CHECK-LSE-O0-NEXT: swp x1, x0, [x0]
+; CHECK-LSE-O0-NEXT: ret
+ %res = atomicrmw xchg ptr %ptr, i64 %rhs monotonic
+ ret i64 %res
+}
+
+define i64 @atomicrmw_sub_i64(ptr %ptr, i64 %rhs) {
+; CHECK-NOLSE-O1-LABEL: atomicrmw_sub_i64:
+; CHECK-NOLSE-O1: ; %bb.0:
+; CHECK-NOLSE-O1-NEXT: LBB59_1: ; %atomicrmw.start
+; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O1-NEXT: ldaxr x8, [x0]
+; CHECK-NOLSE-O1-NEXT: sub x9, x8, x1
+; CHECK-NOLSE-O1-NEXT: stxr w10, x9, [x0]
+; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB59_1
+; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-NOLSE-O1-NEXT: mov x0, x8
+; CHECK-NOLSE-O1-NEXT: ret
+;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_sub_i64:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: .cfi_def_cfa_offset 16
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O1-NEXT: mov x2, x0
+; CHECK-OUTLINE-O1-NEXT: neg x0, x1
+; CHECK-OUTLINE-O1-NEXT: mov x1, x2
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_ldadd8_acq
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
+; CHECK-NOLSE-O0-LABEL: atomicrmw_sub_i64:
+; CHECK-NOLSE-O0: ; %bb.0:
+; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NOLSE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: str x1, [sp, #16] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: ldr x8, [x0]
+; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: b LBB59_1
+; CHECK-NOLSE-O0-NEXT: LBB59_1: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1
+; CHECK-NOLSE-O0-NEXT: ; Child Loop BB59_2 Depth 2
+; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #8] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr x9, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: subs x12, x8, x9
+; CHECK-NOLSE-O0-NEXT: LBB59_2: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB59_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2
+; CHECK-NOLSE-O0-NEXT: ldaxr x9, [x11]
+; CHECK-NOLSE-O0-NEXT: cmp x9, x8
+; CHECK-NOLSE-O0-NEXT: b.ne LBB59_4
+; CHECK-NOLSE-O0-NEXT: ; %bb.3: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB59_2 Depth=2
+; CHECK-NOLSE-O0-NEXT: stlxr w10, x12, [x11]
+; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB59_2
+; CHECK-NOLSE-O0-NEXT: LBB59_4: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB59_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: str x9, [sp] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: subs x8, x9, x8
+; CHECK-NOLSE-O0-NEXT: cset w8, eq
+; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB59_1
+; CHECK-NOLSE-O0-NEXT: b LBB59_5
+; CHECK-NOLSE-O0-NEXT: LBB59_5: ; %atomicrmw.end
+; CHECK-NOLSE-O0-NEXT: ldr x0, [sp] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: ret
+;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_sub_i64:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov x9, x1
+; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: mov x8, xzr
+; CHECK-OUTLINE-O0-NEXT: subs x0, x8, x9
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldadd8_acq
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
+; CHECK-LSE-O1-LABEL: atomicrmw_sub_i64:
+; CHECK-LSE-O1: ; %bb.0:
+; CHECK-LSE-O1-NEXT: neg x8, x1
+; CHECK-LSE-O1-NEXT: ldadda x8, x0, [x0]
+; CHECK-LSE-O1-NEXT: ret
+;
+; CHECK-LSE-O0-LABEL: atomicrmw_sub_i64:
+; CHECK-LSE-O0: ; %bb.0:
+; CHECK-LSE-O0-NEXT: neg x8, x1
+; CHECK-LSE-O0-NEXT: ldadda x8, x0, [x0]
+; CHECK-LSE-O0-NEXT: ret
+ %res = atomicrmw sub ptr %ptr, i64 %rhs acquire
+ ret i64 %res
+}
+
+define i64 @atomicrmw_and_i64(ptr %ptr, i64 %rhs) {
+; CHECK-NOLSE-O1-LABEL: atomicrmw_and_i64:
+; CHECK-NOLSE-O1: ; %bb.0:
+; CHECK-NOLSE-O1-NEXT: LBB60_1: ; %atomicrmw.start
+; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O1-NEXT: ldxr x8, [x0]
+; CHECK-NOLSE-O1-NEXT: and x9, x8, x1
+; CHECK-NOLSE-O1-NEXT: stlxr w10, x9, [x0]
+; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB60_1
+; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-NOLSE-O1-NEXT: mov x0, x8
+; CHECK-NOLSE-O1-NEXT: ret
+;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_and_i64:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: .cfi_def_cfa_offset 16
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O1-NEXT: mov x2, x0
+; CHECK-OUTLINE-O1-NEXT: mov x8, #-1 ; =0xffffffffffffffff
+; CHECK-OUTLINE-O1-NEXT: eor x0, x8, x1
+; CHECK-OUTLINE-O1-NEXT: mov x1, x2
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_ldclr8_rel
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
+; CHECK-NOLSE-O0-LABEL: atomicrmw_and_i64:
+; CHECK-NOLSE-O0: ; %bb.0:
+; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NOLSE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: str x1, [sp, #16] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: ldr x8, [x0]
+; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: b LBB60_1
+; CHECK-NOLSE-O0-NEXT: LBB60_1: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1
+; CHECK-NOLSE-O0-NEXT: ; Child Loop BB60_2 Depth 2
+; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #8] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr x9, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: and x12, x8, x9
+; CHECK-NOLSE-O0-NEXT: LBB60_2: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB60_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2
+; CHECK-NOLSE-O0-NEXT: ldaxr x9, [x11]
+; CHECK-NOLSE-O0-NEXT: cmp x9, x8
+; CHECK-NOLSE-O0-NEXT: b.ne LBB60_4
+; CHECK-NOLSE-O0-NEXT: ; %bb.3: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB60_2 Depth=2
+; CHECK-NOLSE-O0-NEXT: stlxr w10, x12, [x11]
+; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB60_2
+; CHECK-NOLSE-O0-NEXT: LBB60_4: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB60_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: str x9, [sp] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: subs x8, x9, x8
+; CHECK-NOLSE-O0-NEXT: cset w8, eq
+; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB60_1
+; CHECK-NOLSE-O0-NEXT: b LBB60_5
+; CHECK-NOLSE-O0-NEXT: LBB60_5: ; %atomicrmw.end
+; CHECK-NOLSE-O0-NEXT: ldr x0, [sp] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: ret
+;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_and_i64:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov x9, x1
+; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: mov x8, #-1 ; =0xffffffffffffffff
+; CHECK-OUTLINE-O0-NEXT: eor x0, x8, x9
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldclr8_rel
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
+; CHECK-LSE-O1-LABEL: atomicrmw_and_i64:
+; CHECK-LSE-O1: ; %bb.0:
+; CHECK-LSE-O1-NEXT: mvn x8, x1
+; CHECK-LSE-O1-NEXT: ldclrl x8, x0, [x0]
+; CHECK-LSE-O1-NEXT: ret
+;
+; CHECK-LSE-O0-LABEL: atomicrmw_and_i64:
+; CHECK-LSE-O0: ; %bb.0:
+; CHECK-LSE-O0-NEXT: mvn x8, x1
+; CHECK-LSE-O0-NEXT: ldclrl x8, x0, [x0]
+; CHECK-LSE-O0-NEXT: ret
+ %res = atomicrmw and ptr %ptr, i64 %rhs release
+ ret i64 %res
+}
+
+define i64 @atomicrmw_or_i64(ptr %ptr, i64 %rhs) {
+; CHECK-NOLSE-O1-LABEL: atomicrmw_or_i64:
+; CHECK-NOLSE-O1: ; %bb.0:
+; CHECK-NOLSE-O1-NEXT: LBB61_1: ; %atomicrmw.start
+; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O1-NEXT: ldaxr x8, [x0]
+; CHECK-NOLSE-O1-NEXT: orr x9, x8, x1
+; CHECK-NOLSE-O1-NEXT: stlxr w10, x9, [x0]
+; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB61_1
+; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-NOLSE-O1-NEXT: mov x0, x8
+; CHECK-NOLSE-O1-NEXT: ret
+;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_or_i64:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: .cfi_def_cfa_offset 16
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O1-NEXT: mov x2, x0
+; CHECK-OUTLINE-O1-NEXT: mov x0, x1
+; CHECK-OUTLINE-O1-NEXT: mov x1, x2
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_ldset8_acq_rel
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
+; CHECK-NOLSE-O0-LABEL: atomicrmw_or_i64:
+; CHECK-NOLSE-O0: ; %bb.0:
+; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NOLSE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: str x1, [sp, #16] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: ldr x8, [x0]
+; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: b LBB61_1
+; CHECK-NOLSE-O0-NEXT: LBB61_1: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1
+; CHECK-NOLSE-O0-NEXT: ; Child Loop BB61_2 Depth 2
+; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #8] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr x9, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: orr x12, x8, x9
+; CHECK-NOLSE-O0-NEXT: LBB61_2: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB61_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2
+; CHECK-NOLSE-O0-NEXT: ldaxr x9, [x11]
+; CHECK-NOLSE-O0-NEXT: cmp x9, x8
+; CHECK-NOLSE-O0-NEXT: b.ne LBB61_4
+; CHECK-NOLSE-O0-NEXT: ; %bb.3: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB61_2 Depth=2
+; CHECK-NOLSE-O0-NEXT: stlxr w10, x12, [x11]
+; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB61_2
+; CHECK-NOLSE-O0-NEXT: LBB61_4: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB61_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: str x9, [sp] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: subs x8, x9, x8
+; CHECK-NOLSE-O0-NEXT: cset w8, eq
+; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB61_1
+; CHECK-NOLSE-O0-NEXT: b LBB61_5
+; CHECK-NOLSE-O0-NEXT: LBB61_5: ; %atomicrmw.end
+; CHECK-NOLSE-O0-NEXT: ldr x0, [sp] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: ret
+;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_or_i64:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov x0, x1
+; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldset8_acq_rel
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
+; CHECK-LSE-O1-LABEL: atomicrmw_or_i64:
+; CHECK-LSE-O1: ; %bb.0:
+; CHECK-LSE-O1-NEXT: ldsetal x1, x0, [x0]
+; CHECK-LSE-O1-NEXT: ret
+;
+; CHECK-LSE-O0-LABEL: atomicrmw_or_i64:
+; CHECK-LSE-O0: ; %bb.0:
+; CHECK-LSE-O0-NEXT: ldsetal x1, x0, [x0]
+; CHECK-LSE-O0-NEXT: ret
+ %res = atomicrmw or ptr %ptr, i64 %rhs seq_cst
+ ret i64 %res
+}
+
+define i64 @atomicrmw_xor_i64(ptr %ptr, i64 %rhs) {
+; CHECK-NOLSE-O1-LABEL: atomicrmw_xor_i64:
+; CHECK-NOLSE-O1: ; %bb.0:
+; CHECK-NOLSE-O1-NEXT: LBB62_1: ; %atomicrmw.start
+; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O1-NEXT: ldxr x8, [x0]
+; CHECK-NOLSE-O1-NEXT: eor x9, x8, x1
+; CHECK-NOLSE-O1-NEXT: stxr w10, x9, [x0]
+; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB62_1
+; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-NOLSE-O1-NEXT: mov x0, x8
+; CHECK-NOLSE-O1-NEXT: ret
+;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_xor_i64:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: .cfi_def_cfa_offset 16
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O1-NEXT: mov x2, x0
+; CHECK-OUTLINE-O1-NEXT: mov x0, x1
+; CHECK-OUTLINE-O1-NEXT: mov x1, x2
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_ldeor8_relax
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
+; CHECK-NOLSE-O0-LABEL: atomicrmw_xor_i64:
+; CHECK-NOLSE-O0: ; %bb.0:
+; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NOLSE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: str x1, [sp, #16] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: ldr x8, [x0]
+; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: b LBB62_1
+; CHECK-NOLSE-O0-NEXT: LBB62_1: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1
+; CHECK-NOLSE-O0-NEXT: ; Child Loop BB62_2 Depth 2
+; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #8] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr x9, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: eor x12, x8, x9
+; CHECK-NOLSE-O0-NEXT: LBB62_2: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB62_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2
+; CHECK-NOLSE-O0-NEXT: ldaxr x9, [x11]
+; CHECK-NOLSE-O0-NEXT: cmp x9, x8
+; CHECK-NOLSE-O0-NEXT: b.ne LBB62_4
+; CHECK-NOLSE-O0-NEXT: ; %bb.3: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB62_2 Depth=2
+; CHECK-NOLSE-O0-NEXT: stlxr w10, x12, [x11]
+; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB62_2
+; CHECK-NOLSE-O0-NEXT: LBB62_4: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB62_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: str x9, [sp] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: subs x8, x9, x8
+; CHECK-NOLSE-O0-NEXT: cset w8, eq
+; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB62_1
+; CHECK-NOLSE-O0-NEXT: b LBB62_5
+; CHECK-NOLSE-O0-NEXT: LBB62_5: ; %atomicrmw.end
+; CHECK-NOLSE-O0-NEXT: ldr x0, [sp] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: ret
+;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_xor_i64:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov x0, x1
+; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldeor8_relax
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
+; CHECK-LSE-O1-LABEL: atomicrmw_xor_i64:
+; CHECK-LSE-O1: ; %bb.0:
+; CHECK-LSE-O1-NEXT: ldeor x1, x0, [x0]
+; CHECK-LSE-O1-NEXT: ret
+;
+; CHECK-LSE-O0-LABEL: atomicrmw_xor_i64:
+; CHECK-LSE-O0: ; %bb.0:
+; CHECK-LSE-O0-NEXT: ldeor x1, x0, [x0]
+; CHECK-LSE-O0-NEXT: ret
+ %res = atomicrmw xor ptr %ptr, i64 %rhs monotonic
+ ret i64 %res
+}
+
+define i64 @atomicrmw_min_i64(ptr %ptr, i64 %rhs) {
+; CHECK-NOLSE-O1-LABEL: atomicrmw_min_i64:
+; CHECK-NOLSE-O1: ; %bb.0:
+; CHECK-NOLSE-O1-NEXT: LBB63_1: ; %atomicrmw.start
+; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O1-NEXT: ldaxr x8, [x0]
+; CHECK-NOLSE-O1-NEXT: cmp x8, x1
+; CHECK-NOLSE-O1-NEXT: csel x9, x8, x1, le
+; CHECK-NOLSE-O1-NEXT: stxr w10, x9, [x0]
+; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB63_1
+; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-NOLSE-O1-NEXT: mov x0, x8
+; CHECK-NOLSE-O1-NEXT: ret
+;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_min_i64:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: LBB63_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O1-NEXT: ldaxr x8, [x0]
+; CHECK-OUTLINE-O1-NEXT: cmp x8, x1
+; CHECK-OUTLINE-O1-NEXT: csel x9, x8, x1, le
+; CHECK-OUTLINE-O1-NEXT: stxr w10, x9, [x0]
+; CHECK-OUTLINE-O1-NEXT: cbnz w10, LBB63_1
+; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-OUTLINE-O1-NEXT: mov x0, x8
+; CHECK-OUTLINE-O1-NEXT: ret
+;
+; CHECK-NOLSE-O0-LABEL: atomicrmw_min_i64:
+; CHECK-NOLSE-O0: ; %bb.0:
+; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NOLSE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: str x1, [sp, #16] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: ldr x8, [x0]
+; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: b LBB63_1
+; CHECK-NOLSE-O0-NEXT: LBB63_1: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1
+; CHECK-NOLSE-O0-NEXT: ; Child Loop BB63_2 Depth 2
+; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #8] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr x9, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: subs x10, x8, x9
+; CHECK-NOLSE-O0-NEXT: csel x12, x8, x9, le
+; CHECK-NOLSE-O0-NEXT: LBB63_2: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB63_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2
+; CHECK-NOLSE-O0-NEXT: ldaxr x9, [x11]
+; CHECK-NOLSE-O0-NEXT: cmp x9, x8
+; CHECK-NOLSE-O0-NEXT: b.ne LBB63_4
+; CHECK-NOLSE-O0-NEXT: ; %bb.3: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB63_2 Depth=2
+; CHECK-NOLSE-O0-NEXT: stlxr w10, x12, [x11]
+; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB63_2
+; CHECK-NOLSE-O0-NEXT: LBB63_4: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB63_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: str x9, [sp] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: subs x8, x9, x8
+; CHECK-NOLSE-O0-NEXT: cset w8, eq
+; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB63_1
+; CHECK-NOLSE-O0-NEXT: b LBB63_5
+; CHECK-NOLSE-O0-NEXT: LBB63_5: ; %atomicrmw.end
+; CHECK-NOLSE-O0-NEXT: ldr x0, [sp] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: ret
+;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_min_i64:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #64
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #48] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 64
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #24] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: str x1, [sp, #32] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: ldr x0, [x0]
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #40] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: b LBB63_1
+; CHECK-OUTLINE-O0-NEXT: LBB63_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O0-NEXT: ldr x0, [sp, #40] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #24] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr x8, [sp, #32] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: subs x9, x0, x8
+; CHECK-OUTLINE-O0-NEXT: csel x1, x0, x8, le
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas8_acq
+; CHECK-OUTLINE-O0-NEXT: ldr x8, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: subs x8, x0, x8
+; CHECK-OUTLINE-O0-NEXT: cset w8, eq
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #40] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB63_1
+; CHECK-OUTLINE-O0-NEXT: b LBB63_2
+; CHECK-OUTLINE-O0-NEXT: LBB63_2: ; %atomicrmw.end
+; CHECK-OUTLINE-O0-NEXT: ldr x0, [sp, #16] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #64
+; CHECK-OUTLINE-O0-NEXT: ret
+;
+; CHECK-LSE-O1-LABEL: atomicrmw_min_i64:
+; CHECK-LSE-O1: ; %bb.0:
+; CHECK-LSE-O1-NEXT: ldsmina x1, x0, [x0]
+; CHECK-LSE-O1-NEXT: ret
+;
+; CHECK-LSE-O0-LABEL: atomicrmw_min_i64:
+; CHECK-LSE-O0: ; %bb.0:
+; CHECK-LSE-O0-NEXT: ldsmina x1, x0, [x0]
+; CHECK-LSE-O0-NEXT: ret
+ %res = atomicrmw min ptr %ptr, i64 %rhs acquire
+ ret i64 %res
+}
+
+define i64 @atomicrmw_max_i64(ptr %ptr, i64 %rhs) {
+; CHECK-NOLSE-O1-LABEL: atomicrmw_max_i64:
+; CHECK-NOLSE-O1: ; %bb.0:
+; CHECK-NOLSE-O1-NEXT: LBB64_1: ; %atomicrmw.start
+; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O1-NEXT: ldxr x8, [x0]
+; CHECK-NOLSE-O1-NEXT: cmp x8, x1
+; CHECK-NOLSE-O1-NEXT: csel x9, x8, x1, gt
+; CHECK-NOLSE-O1-NEXT: stlxr w10, x9, [x0]
+; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB64_1
+; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-NOLSE-O1-NEXT: mov x0, x8
+; CHECK-NOLSE-O1-NEXT: ret
+;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_max_i64:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: LBB64_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O1-NEXT: ldxr x8, [x0]
+; CHECK-OUTLINE-O1-NEXT: cmp x8, x1
+; CHECK-OUTLINE-O1-NEXT: csel x9, x8, x1, gt
+; CHECK-OUTLINE-O1-NEXT: stlxr w10, x9, [x0]
+; CHECK-OUTLINE-O1-NEXT: cbnz w10, LBB64_1
+; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-OUTLINE-O1-NEXT: mov x0, x8
+; CHECK-OUTLINE-O1-NEXT: ret
+;
+; CHECK-NOLSE-O0-LABEL: atomicrmw_max_i64:
+; CHECK-NOLSE-O0: ; %bb.0:
+; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NOLSE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: str x1, [sp, #16] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: ldr x8, [x0]
+; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: b LBB64_1
+; CHECK-NOLSE-O0-NEXT: LBB64_1: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1
+; CHECK-NOLSE-O0-NEXT: ; Child Loop BB64_2 Depth 2
+; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #8] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr x9, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: subs x10, x8, x9
+; CHECK-NOLSE-O0-NEXT: csel x12, x8, x9, gt
+; CHECK-NOLSE-O0-NEXT: LBB64_2: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB64_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2
+; CHECK-NOLSE-O0-NEXT: ldaxr x9, [x11]
+; CHECK-NOLSE-O0-NEXT: cmp x9, x8
+; CHECK-NOLSE-O0-NEXT: b.ne LBB64_4
+; CHECK-NOLSE-O0-NEXT: ; %bb.3: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB64_2 Depth=2
+; CHECK-NOLSE-O0-NEXT: stlxr w10, x12, [x11]
+; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB64_2
+; CHECK-NOLSE-O0-NEXT: LBB64_4: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB64_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: str x9, [sp] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: subs x8, x9, x8
+; CHECK-NOLSE-O0-NEXT: cset w8, eq
+; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB64_1
+; CHECK-NOLSE-O0-NEXT: b LBB64_5
+; CHECK-NOLSE-O0-NEXT: LBB64_5: ; %atomicrmw.end
+; CHECK-NOLSE-O0-NEXT: ldr x0, [sp] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: ret
+;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_max_i64:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #64
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #48] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 64
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #24] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: str x1, [sp, #32] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: ldr x0, [x0]
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #40] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: b LBB64_1
+; CHECK-OUTLINE-O0-NEXT: LBB64_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O0-NEXT: ldr x0, [sp, #40] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #24] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr x8, [sp, #32] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: subs x9, x0, x8
+; CHECK-OUTLINE-O0-NEXT: csel x1, x0, x8, gt
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas8_rel
+; CHECK-OUTLINE-O0-NEXT: ldr x8, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: subs x8, x0, x8
+; CHECK-OUTLINE-O0-NEXT: cset w8, eq
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #40] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB64_1
+; CHECK-OUTLINE-O0-NEXT: b LBB64_2
+; CHECK-OUTLINE-O0-NEXT: LBB64_2: ; %atomicrmw.end
+; CHECK-OUTLINE-O0-NEXT: ldr x0, [sp, #16] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #64
+; CHECK-OUTLINE-O0-NEXT: ret
+;
+; CHECK-LSE-O1-LABEL: atomicrmw_max_i64:
+; CHECK-LSE-O1: ; %bb.0:
+; CHECK-LSE-O1-NEXT: ldsmaxl x1, x0, [x0]
+; CHECK-LSE-O1-NEXT: ret
+;
+; CHECK-LSE-O0-LABEL: atomicrmw_max_i64:
+; CHECK-LSE-O0: ; %bb.0:
+; CHECK-LSE-O0-NEXT: ldsmaxl x1, x0, [x0]
+; CHECK-LSE-O0-NEXT: ret
+ %res = atomicrmw max ptr %ptr, i64 %rhs release
+ ret i64 %res
+}
+
+define i64 @atomicrmw_umin_i64(ptr %ptr, i64 %rhs) {
+; CHECK-NOLSE-O1-LABEL: atomicrmw_umin_i64:
+; CHECK-NOLSE-O1: ; %bb.0:
+; CHECK-NOLSE-O1-NEXT: LBB65_1: ; %atomicrmw.start
+; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O1-NEXT: ldaxr x8, [x0]
+; CHECK-NOLSE-O1-NEXT: cmp x8, x1
+; CHECK-NOLSE-O1-NEXT: csel x9, x8, x1, ls
+; CHECK-NOLSE-O1-NEXT: stlxr w10, x9, [x0]
+; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB65_1
+; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-NOLSE-O1-NEXT: mov x0, x8
+; CHECK-NOLSE-O1-NEXT: ret
+;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_umin_i64:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: LBB65_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O1-NEXT: ldaxr x8, [x0]
+; CHECK-OUTLINE-O1-NEXT: cmp x8, x1
+; CHECK-OUTLINE-O1-NEXT: csel x9, x8, x1, ls
+; CHECK-OUTLINE-O1-NEXT: stlxr w10, x9, [x0]
+; CHECK-OUTLINE-O1-NEXT: cbnz w10, LBB65_1
+; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-OUTLINE-O1-NEXT: mov x0, x8
+; CHECK-OUTLINE-O1-NEXT: ret
+;
+; CHECK-NOLSE-O0-LABEL: atomicrmw_umin_i64:
+; CHECK-NOLSE-O0: ; %bb.0:
+; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NOLSE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: str x1, [sp, #16] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: ldr x8, [x0]
+; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: b LBB65_1
+; CHECK-NOLSE-O0-NEXT: LBB65_1: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1
+; CHECK-NOLSE-O0-NEXT: ; Child Loop BB65_2 Depth 2
+; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #8] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr x9, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: subs x10, x8, x9
+; CHECK-NOLSE-O0-NEXT: csel x12, x8, x9, ls
+; CHECK-NOLSE-O0-NEXT: LBB65_2: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB65_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2
+; CHECK-NOLSE-O0-NEXT: ldaxr x9, [x11]
+; CHECK-NOLSE-O0-NEXT: cmp x9, x8
+; CHECK-NOLSE-O0-NEXT: b.ne LBB65_4
+; CHECK-NOLSE-O0-NEXT: ; %bb.3: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB65_2 Depth=2
+; CHECK-NOLSE-O0-NEXT: stlxr w10, x12, [x11]
+; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB65_2
+; CHECK-NOLSE-O0-NEXT: LBB65_4: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB65_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: str x9, [sp] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: subs x8, x9, x8
+; CHECK-NOLSE-O0-NEXT: cset w8, eq
+; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB65_1
+; CHECK-NOLSE-O0-NEXT: b LBB65_5
+; CHECK-NOLSE-O0-NEXT: LBB65_5: ; %atomicrmw.end
+; CHECK-NOLSE-O0-NEXT: ldr x0, [sp] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: ret
+;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_umin_i64:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #64
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #48] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 64
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #24] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: str x1, [sp, #32] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: ldr x0, [x0]
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #40] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: b LBB65_1
+; CHECK-OUTLINE-O0-NEXT: LBB65_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O0-NEXT: ldr x0, [sp, #40] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #24] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr x8, [sp, #32] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: subs x9, x0, x8
+; CHECK-OUTLINE-O0-NEXT: csel x1, x0, x8, ls
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas8_acq_rel
+; CHECK-OUTLINE-O0-NEXT: ldr x8, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: subs x8, x0, x8
+; CHECK-OUTLINE-O0-NEXT: cset w8, eq
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #40] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB65_1
+; CHECK-OUTLINE-O0-NEXT: b LBB65_2
+; CHECK-OUTLINE-O0-NEXT: LBB65_2: ; %atomicrmw.end
+; CHECK-OUTLINE-O0-NEXT: ldr x0, [sp, #16] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #64
+; CHECK-OUTLINE-O0-NEXT: ret
+;
+; CHECK-LSE-O1-LABEL: atomicrmw_umin_i64:
+; CHECK-LSE-O1: ; %bb.0:
+; CHECK-LSE-O1-NEXT: lduminal x1, x0, [x0]
+; CHECK-LSE-O1-NEXT: ret
+;
+; CHECK-LSE-O0-LABEL: atomicrmw_umin_i64:
+; CHECK-LSE-O0: ; %bb.0:
+; CHECK-LSE-O0-NEXT: lduminal x1, x0, [x0]
+; CHECK-LSE-O0-NEXT: ret
+ %res = atomicrmw umin ptr %ptr, i64 %rhs seq_cst
+ ret i64 %res
+}
+
+define i64 @atomicrmw_umax_i64(ptr %ptr, i64 %rhs) {
+; CHECK-NOLSE-O1-LABEL: atomicrmw_umax_i64:
+; CHECK-NOLSE-O1: ; %bb.0:
+; CHECK-NOLSE-O1-NEXT: LBB66_1: ; %atomicrmw.start
+; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O1-NEXT: ldxr x8, [x0]
+; CHECK-NOLSE-O1-NEXT: cmp x8, x1
+; CHECK-NOLSE-O1-NEXT: csel x9, x8, x1, hi
+; CHECK-NOLSE-O1-NEXT: stxr w10, x9, [x0]
+; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB66_1
+; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-NOLSE-O1-NEXT: mov x0, x8
+; CHECK-NOLSE-O1-NEXT: ret
+;
+; CHECK-OUTLINE-O1-LABEL: atomicrmw_umax_i64:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: LBB66_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O1-NEXT: ldxr x8, [x0]
+; CHECK-OUTLINE-O1-NEXT: cmp x8, x1
+; CHECK-OUTLINE-O1-NEXT: csel x9, x8, x1, hi
+; CHECK-OUTLINE-O1-NEXT: stxr w10, x9, [x0]
+; CHECK-OUTLINE-O1-NEXT: cbnz w10, LBB66_1
+; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
+; CHECK-OUTLINE-O1-NEXT: mov x0, x8
+; CHECK-OUTLINE-O1-NEXT: ret
+;
+; CHECK-NOLSE-O0-LABEL: atomicrmw_umax_i64:
+; CHECK-NOLSE-O0: ; %bb.0:
+; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NOLSE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: str x1, [sp, #16] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: ldr x8, [x0]
+; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: b LBB66_1
+; CHECK-NOLSE-O0-NEXT: LBB66_1: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1
+; CHECK-NOLSE-O0-NEXT: ; Child Loop BB66_2 Depth 2
+; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #8] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: ldr x9, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: subs x10, x8, x9
+; CHECK-NOLSE-O0-NEXT: csel x12, x8, x9, hi
+; CHECK-NOLSE-O0-NEXT: LBB66_2: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB66_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2
+; CHECK-NOLSE-O0-NEXT: ldaxr x9, [x11]
+; CHECK-NOLSE-O0-NEXT: cmp x9, x8
+; CHECK-NOLSE-O0-NEXT: b.ne LBB66_4
+; CHECK-NOLSE-O0-NEXT: ; %bb.3: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB66_2 Depth=2
+; CHECK-NOLSE-O0-NEXT: stlxr w10, x12, [x11]
+; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB66_2
+; CHECK-NOLSE-O0-NEXT: LBB66_4: ; %atomicrmw.start
+; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB66_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: str x9, [sp] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: subs x8, x9, x8
+; CHECK-NOLSE-O0-NEXT: cset w8, eq
+; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Folded Spill
+; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB66_1
+; CHECK-NOLSE-O0-NEXT: b LBB66_5
+; CHECK-NOLSE-O0-NEXT: LBB66_5: ; %atomicrmw.end
+; CHECK-NOLSE-O0-NEXT: ldr x0, [sp] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT: add sp, sp, #32
+; CHECK-NOLSE-O0-NEXT: ret
+;
+; CHECK-OUTLINE-O0-LABEL: atomicrmw_umax_i64:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #64
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #48] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 64
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #24] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: str x1, [sp, #32] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: ldr x0, [x0]
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #40] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: b LBB66_1
+; CHECK-OUTLINE-O0-NEXT: LBB66_1: ; %atomicrmw.start
+; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-OUTLINE-O0-NEXT: ldr x0, [sp, #40] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #24] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldr x8, [sp, #32] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: subs x9, x0, x8
+; CHECK-OUTLINE-O0-NEXT: csel x1, x0, x8, hi
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas8_relax
+; CHECK-OUTLINE-O0-NEXT: ldr x8, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: subs x8, x0, x8
+; CHECK-OUTLINE-O0-NEXT: cset w8, eq
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #40] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB66_1
+; CHECK-OUTLINE-O0-NEXT: b LBB66_2
+; CHECK-OUTLINE-O0-NEXT: LBB66_2: ; %atomicrmw.end
+; CHECK-OUTLINE-O0-NEXT: ldr x0, [sp, #16] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #64
+; CHECK-OUTLINE-O0-NEXT: ret
+;
+; CHECK-LSE-O1-LABEL: atomicrmw_umax_i64:
+; CHECK-LSE-O1: ; %bb.0:
+; CHECK-LSE-O1-NEXT: ldumax x1, x0, [x0]
+; CHECK-LSE-O1-NEXT: ret
+;
+; CHECK-LSE-O0-LABEL: atomicrmw_umax_i64:
+; CHECK-LSE-O0: ; %bb.0:
+; CHECK-LSE-O0-NEXT: ldumax x1, x0, [x0]
+; CHECK-LSE-O0-NEXT: ret
+ %res = atomicrmw umax ptr %ptr, i64 %rhs monotonic
+ ret i64 %res
+}
+
define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) {
; CHECK-NOLSE-O1-LABEL: cmpxchg_i8:
; CHECK-NOLSE-O1: ; %bb.0:
; CHECK-NOLSE-O1-NEXT: mov x8, x0
; CHECK-NOLSE-O1-NEXT: ; kill: def $w2 killed $w2 def $x2
-; CHECK-NOLSE-O1-NEXT: LBB47_1: ; %cmpxchg.start
+; CHECK-NOLSE-O1-NEXT: LBB67_1: ; %cmpxchg.start
; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT: ldxrb w0, [x8]
; CHECK-NOLSE-O1-NEXT: and w9, w0, #0xff
; CHECK-NOLSE-O1-NEXT: cmp w9, w1, uxtb
-; CHECK-NOLSE-O1-NEXT: b.ne LBB47_4
+; CHECK-NOLSE-O1-NEXT: b.ne LBB67_4
; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore
-; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB47_1 Depth=1
+; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB67_1 Depth=1
; CHECK-NOLSE-O1-NEXT: stxrb w9, w2, [x8]
-; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB47_1
+; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB67_1
; CHECK-NOLSE-O1-NEXT: ; %bb.3:
; CHECK-NOLSE-O1-NEXT: mov w1, #1 ; =0x1
; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0
; CHECK-NOLSE-O1-NEXT: ret
-; CHECK-NOLSE-O1-NEXT: LBB47_4: ; %cmpxchg.nostore
+; CHECK-NOLSE-O1-NEXT: LBB67_4: ; %cmpxchg.nostore
; CHECK-NOLSE-O1-NEXT: mov w1, wzr
; CHECK-NOLSE-O1-NEXT: clrex
; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0
@@ -4003,14 +6046,14 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) {
; CHECK-NOLSE-O0-LABEL: cmpxchg_i8:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: mov x9, x0
-; CHECK-NOLSE-O0-NEXT: LBB47_1: ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O0-NEXT: LBB67_1: ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT: ldaxrb w0, [x9]
; CHECK-NOLSE-O0-NEXT: cmp w0, w1, uxtb
-; CHECK-NOLSE-O0-NEXT: b.ne LBB47_3
-; CHECK-NOLSE-O0-NEXT: ; %bb.2: ; in Loop: Header=BB47_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: b.ne LBB67_3
+; CHECK-NOLSE-O0-NEXT: ; %bb.2: ; in Loop: Header=BB67_1 Depth=1
; CHECK-NOLSE-O0-NEXT: stlxrb w8, w2, [x9]
-; CHECK-NOLSE-O0-NEXT: cbnz w8, LBB47_1
-; CHECK-NOLSE-O0-NEXT: LBB47_3:
+; CHECK-NOLSE-O0-NEXT: cbnz w8, LBB67_1
+; CHECK-NOLSE-O0-NEXT: LBB67_3:
; CHECK-NOLSE-O0-NEXT: and w8, w0, #0xff
; CHECK-NOLSE-O0-NEXT: subs w8, w8, w1, uxtb
; CHECK-NOLSE-O0-NEXT: cset w1, eq
@@ -4065,21 +6108,21 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
; CHECK-NOLSE-O1: ; %bb.0:
; CHECK-NOLSE-O1-NEXT: mov x8, x0
; CHECK-NOLSE-O1-NEXT: ; kill: def $w2 killed $w2 def $x2
-; CHECK-NOLSE-O1-NEXT: LBB48_1: ; %cmpxchg.start
+; CHECK-NOLSE-O1-NEXT: LBB68_1: ; %cmpxchg.start
; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT: ldxrh w0, [x8]
; CHECK-NOLSE-O1-NEXT: and w9, w0, #0xffff
; CHECK-NOLSE-O1-NEXT: cmp w9, w1, uxth
-; CHECK-NOLSE-O1-NEXT: b.ne LBB48_4
+; CHECK-NOLSE-O1-NEXT: b.ne LBB68_4
; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore
-; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB48_1 Depth=1
+; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB68_1 Depth=1
; CHECK-NOLSE-O1-NEXT: stxrh w9, w2, [x8]
-; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB48_1
+; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB68_1
; CHECK-NOLSE-O1-NEXT: ; %bb.3:
; CHECK-NOLSE-O1-NEXT: mov w1, #1 ; =0x1
; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0
; CHECK-NOLSE-O1-NEXT: ret
-; CHECK-NOLSE-O1-NEXT: LBB48_4: ; %cmpxchg.nostore
+; CHECK-NOLSE-O1-NEXT: LBB68_4: ; %cmpxchg.nostore
; CHECK-NOLSE-O1-NEXT: mov w1, wzr
; CHECK-NOLSE-O1-NEXT: clrex
; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0
@@ -4110,14 +6153,14 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
; CHECK-NOLSE-O0-LABEL: cmpxchg_i16:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: mov x9, x0
-; CHECK-NOLSE-O0-NEXT: LBB48_1: ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O0-NEXT: LBB68_1: ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT: ldaxrh w0, [x9]
; CHECK-NOLSE-O0-NEXT: cmp w0, w1, uxth
-; CHECK-NOLSE-O0-NEXT: b.ne LBB48_3
-; CHECK-NOLSE-O0-NEXT: ; %bb.2: ; in Loop: Header=BB48_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: b.ne LBB68_3
+; CHECK-NOLSE-O0-NEXT: ; %bb.2: ; in Loop: Header=BB68_1 Depth=1
; CHECK-NOLSE-O0-NEXT: stlxrh w8, w2, [x9]
-; CHECK-NOLSE-O0-NEXT: cbnz w8, LBB48_1
-; CHECK-NOLSE-O0-NEXT: LBB48_3:
+; CHECK-NOLSE-O0-NEXT: cbnz w8, LBB68_1
+; CHECK-NOLSE-O0-NEXT: LBB68_3:
; CHECK-NOLSE-O0-NEXT: and w8, w0, #0xffff
; CHECK-NOLSE-O0-NEXT: subs w8, w8, w1, uxth
; CHECK-NOLSE-O0-NEXT: cset w1, eq
@@ -4167,6 +6210,204 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
ret { i16, i1 } %res
}
+define { i32, i1 } @cmpxchg_i32(ptr %ptr, i32 %desired, i32 %new) {
+; CHECK-NOLSE-O1-LABEL: cmpxchg_i32:
+; CHECK-NOLSE-O1: ; %bb.0:
+; CHECK-NOLSE-O1-NEXT: mov x8, x0
+; CHECK-NOLSE-O1-NEXT: LBB69_1: ; %cmpxchg.start
+; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O1-NEXT: ldxr w0, [x8]
+; CHECK-NOLSE-O1-NEXT: cmp w0, w1
+; CHECK-NOLSE-O1-NEXT: b.ne LBB69_4
+; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore
+; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB69_1 Depth=1
+; CHECK-NOLSE-O1-NEXT: stxr w9, w2, [x8]
+; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB69_1
+; CHECK-NOLSE-O1-NEXT: ; %bb.3:
+; CHECK-NOLSE-O1-NEXT: mov w1, #1 ; =0x1
+; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0
+; CHECK-NOLSE-O1-NEXT: ret
+; CHECK-NOLSE-O1-NEXT: LBB69_4: ; %cmpxchg.nostore
+; CHECK-NOLSE-O1-NEXT: mov w1, wzr
+; CHECK-NOLSE-O1-NEXT: clrex
+; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0
+; CHECK-NOLSE-O1-NEXT: ret
+;
+; CHECK-OUTLINE-O1-LABEL: cmpxchg_i32:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w19, -24
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w20, -32
+; CHECK-OUTLINE-O1-NEXT: mov x3, x0
+; CHECK-OUTLINE-O1-NEXT: mov w19, w1
+; CHECK-OUTLINE-O1-NEXT: mov w1, w2
+; CHECK-OUTLINE-O1-NEXT: mov w0, w19
+; CHECK-OUTLINE-O1-NEXT: mov x2, x3
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_cas4_relax
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: cmp w0, w19
+; CHECK-OUTLINE-O1-NEXT: cset w1, eq
+; CHECK-OUTLINE-O1-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
+; CHECK-NOLSE-O0-LABEL: cmpxchg_i32:
+; CHECK-NOLSE-O0: ; %bb.0:
+; CHECK-NOLSE-O0-NEXT: mov x9, x0
+; CHECK-NOLSE-O0-NEXT: LBB69_1: ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O0-NEXT: ldaxr w0, [x9]
+; CHECK-NOLSE-O0-NEXT: cmp w0, w1
+; CHECK-NOLSE-O0-NEXT: b.ne LBB69_3
+; CHECK-NOLSE-O0-NEXT: ; %bb.2: ; in Loop: Header=BB69_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: stlxr w8, w2, [x9]
+; CHECK-NOLSE-O0-NEXT: cbnz w8, LBB69_1
+; CHECK-NOLSE-O0-NEXT: LBB69_3:
+; CHECK-NOLSE-O0-NEXT: subs w8, w0, w1
+; CHECK-NOLSE-O0-NEXT: cset w1, eq
+; CHECK-NOLSE-O0-NEXT: ret
+;
+; CHECK-OUTLINE-O0-LABEL: cmpxchg_i32:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov w0, w1
+; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov w1, w2
+; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas4_relax
+; CHECK-OUTLINE-O0-NEXT: ldr w1, [sp, #12] ; 4-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: subs w8, w0, w1
+; CHECK-OUTLINE-O0-NEXT: cset w1, eq
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
+; CHECK-LSE-O1-LABEL: cmpxchg_i32:
+; CHECK-LSE-O1: ; %bb.0:
+; CHECK-LSE-O1-NEXT: mov x8, x1
+; CHECK-LSE-O1-NEXT: cas w8, w2, [x0]
+; CHECK-LSE-O1-NEXT: cmp w8, w1
+; CHECK-LSE-O1-NEXT: cset w1, eq
+; CHECK-LSE-O1-NEXT: mov x0, x8
+; CHECK-LSE-O1-NEXT: ret
+;
+; CHECK-LSE-O0-LABEL: cmpxchg_i32:
+; CHECK-LSE-O0: ; %bb.0:
+; CHECK-LSE-O0-NEXT: mov x8, x0
+; CHECK-LSE-O0-NEXT: mov x0, x1
+; CHECK-LSE-O0-NEXT: cas w0, w2, [x8]
+; CHECK-LSE-O0-NEXT: subs w8, w0, w1
+; CHECK-LSE-O0-NEXT: cset w1, eq
+; CHECK-LSE-O0-NEXT: ret
+ %res = cmpxchg ptr %ptr, i32 %desired, i32 %new monotonic monotonic
+ ret { i32, i1 } %res
+}
+
+define { i64, i1 } @cmpxchg_i64(ptr %ptr, i64 %desired, i64 %new) {
+; CHECK-NOLSE-O1-LABEL: cmpxchg_i64:
+; CHECK-NOLSE-O1: ; %bb.0:
+; CHECK-NOLSE-O1-NEXT: mov x8, x0
+; CHECK-NOLSE-O1-NEXT: LBB70_1: ; %cmpxchg.start
+; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O1-NEXT: ldxr x0, [x8]
+; CHECK-NOLSE-O1-NEXT: cmp x0, x1
+; CHECK-NOLSE-O1-NEXT: b.ne LBB70_4
+; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore
+; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB70_1 Depth=1
+; CHECK-NOLSE-O1-NEXT: stxr w9, x2, [x8]
+; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB70_1
+; CHECK-NOLSE-O1-NEXT: ; %bb.3:
+; CHECK-NOLSE-O1-NEXT: mov w1, #1 ; =0x1
+; CHECK-NOLSE-O1-NEXT: ret
+; CHECK-NOLSE-O1-NEXT: LBB70_4: ; %cmpxchg.nostore
+; CHECK-NOLSE-O1-NEXT: mov w1, wzr
+; CHECK-NOLSE-O1-NEXT: clrex
+; CHECK-NOLSE-O1-NEXT: ret
+;
+; CHECK-OUTLINE-O1-LABEL: cmpxchg_i64:
+; CHECK-OUTLINE-O1: ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O1-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w19, -24
+; CHECK-OUTLINE-O1-NEXT: .cfi_offset w20, -32
+; CHECK-OUTLINE-O1-NEXT: mov x3, x0
+; CHECK-OUTLINE-O1-NEXT: mov x19, x1
+; CHECK-OUTLINE-O1-NEXT: mov x1, x2
+; CHECK-OUTLINE-O1-NEXT: mov x0, x19
+; CHECK-OUTLINE-O1-NEXT: mov x2, x3
+; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_cas8_relax
+; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: cmp x0, x19
+; CHECK-OUTLINE-O1-NEXT: cset w1, eq
+; CHECK-OUTLINE-O1-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
+; CHECK-OUTLINE-O1-NEXT: ret
+;
+; CHECK-NOLSE-O0-LABEL: cmpxchg_i64:
+; CHECK-NOLSE-O0: ; %bb.0:
+; CHECK-NOLSE-O0-NEXT: mov x9, x0
+; CHECK-NOLSE-O0-NEXT: LBB70_1: ; =>This Inner Loop Header: Depth=1
+; CHECK-NOLSE-O0-NEXT: ldaxr x0, [x9]
+; CHECK-NOLSE-O0-NEXT: cmp x0, x1
+; CHECK-NOLSE-O0-NEXT: b.ne LBB70_3
+; CHECK-NOLSE-O0-NEXT: ; %bb.2: ; in Loop: Header=BB70_1 Depth=1
+; CHECK-NOLSE-O0-NEXT: stlxr w8, x2, [x9]
+; CHECK-NOLSE-O0-NEXT: cbnz w8, LBB70_1
+; CHECK-NOLSE-O0-NEXT: LBB70_3:
+; CHECK-NOLSE-O0-NEXT: subs x8, x0, x1
+; CHECK-NOLSE-O0-NEXT: cset w1, eq
+; CHECK-NOLSE-O0-NEXT: ret
+;
+; CHECK-OUTLINE-O0-LABEL: cmpxchg_i64:
+; CHECK-OUTLINE-O0: ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8
+; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov x0, x1
+; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-OUTLINE-O0-NEXT: mov x1, x2
+; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas8_relax
+; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: subs x8, x0, x1
+; CHECK-OUTLINE-O0-NEXT: cset w1, eq
+; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32
+; CHECK-OUTLINE-O0-NEXT: ret
+;
+; CHECK-LSE-O1-LABEL: cmpxchg_i64:
+; CHECK-LSE-O1: ; %bb.0:
+; CHECK-LSE-O1-NEXT: mov x8, x1
+; CHECK-LSE-O1-NEXT: cas x8, x2, [x0]
+; CHECK-LSE-O1-NEXT: cmp x8, x1
+; CHECK-LSE-O1-NEXT: cset w1, eq
+; CHECK-LSE-O1-NEXT: mov x0, x8
+; CHECK-LSE-O1-NEXT: ret
+;
+; CHECK-LSE-O0-LABEL: cmpxchg_i64:
+; CHECK-LSE-O0: ; %bb.0:
+; CHECK-LSE-O0-NEXT: mov x8, x0
+; CHECK-LSE-O0-NEXT: mov x0, x1
+; CHECK-LSE-O0-NEXT: cas x0, x2, [x8]
+; CHECK-LSE-O0-NEXT: subs x8, x0, x1
+; CHECK-LSE-O0-NEXT: cset w1, eq
+; CHECK-LSE-O0-NEXT: ret
+ %res = cmpxchg ptr %ptr, i64 %desired, i64 %new monotonic monotonic
+ ret { i64, i1 } %res
+}
+
define internal double @bitcast_to_double(ptr %ptr) {
; CHECK-NOLSE-LABEL: bitcast_to_double:
; CHECK-NOLSE: ; %bb.0:
>From 904204a25c2e29e6379428fa20db96abd590d6c6 Mon Sep 17 00:00:00 2001
From: Thomas Preud'homme <thomas.preudhomme at arm.com>
Date: Fri, 15 Dec 2023 13:56:28 +0000
Subject: [PATCH 6/6] Do not duplicate explanation for min/max outlining
---
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 8a0ff402336e14..1c1db11eda5150 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -778,12 +778,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
UseOutlineAtomics))
.clampScalar(0, s32, s64);
- // [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far.
- // Don't outline them unless
- // (1) high level <atomic> support approved:
- // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
- // (2) low level libgcc and compiler-rt support implemented by:
- // min/max outline atomics helpers
+ // Do not outline these atomics operations, as per comment in
+ // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
getActionDefinitionsBuilder(
{G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
.clampScalar(0, s32, s64)
More information about the llvm-commits
mailing list