[llvm] 4d7df43 - [AArch64] Out-of-line atomics (-moutline-atomics) implementation.

Pavel Iliin via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 20 05:30:33 PST 2020


Author: Pavel Iliin
Date: 2020-11-20T13:30:12Z
New Revision: 4d7df43ffdb460dddb2877a886f75f45c3fee188

URL: https://github.com/llvm/llvm-project/commit/4d7df43ffdb460dddb2877a886f75f45c3fee188
DIFF: https://github.com/llvm/llvm-project/commit/4d7df43ffdb460dddb2877a886f75f45c3fee188.diff

LOG: [AArch64] Out-of-line atomics (-moutline-atomics) implementation.

This patch implements out of line atomics for LSE deployment
mechanism. Details how it works can be found in llvm/docs/Atomics.rst
Options -moutline-atomics and -mno-outline-atomics to enable and disable it
were added to clang driver. This is clang and llvm part of out-of-line atomics
interface, library part is already supported by libgcc. Compiler-rt
support is provided in separate patch.

Differential Revision: https://reviews.llvm.org/D91157

Added: 
    

Modified: 
    clang/include/clang/Basic/DiagnosticDriverKinds.td
    clang/include/clang/Driver/Options.td
    clang/lib/Driver/ToolChains/Clang.cpp
    llvm/docs/Atomics.rst
    llvm/include/llvm/CodeGen/RuntimeLibcalls.h
    llvm/include/llvm/IR/RuntimeLibcalls.def
    llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
    llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
    llvm/lib/CodeGen/TargetLoweringBase.cpp
    llvm/lib/Target/AArch64/AArch64.td
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/lib/Target/AArch64/AArch64Subtarget.h
    llvm/test/CodeGen/AArch64/arm64-atomic-128.ll
    llvm/test/CodeGen/AArch64/arm64-atomic.ll
    llvm/test/CodeGen/AArch64/arm64_32-atomics.ll
    llvm/test/CodeGen/AArch64/atomic-ops-lse.ll
    llvm/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
    llvm/test/CodeGen/AArch64/atomic-ops.ll
    llvm/test/CodeGen/AArch64/cmpxchg-O0.ll
    llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
    llvm/test/CodeGen/AArch64/cmpxchg-lse-even-regs.ll
    llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 43ee4968dafd..781fcfc6deee 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -491,6 +491,10 @@ def warn_drv_moutline_unsupported_opt : Warning<
   "The '%0' architecture does not support -moutline; flag ignored">,
   InGroup<OptionIgnored>;
 
+def warn_drv_moutline_atomics_unsupported_opt : Warning<
+  "The '%0' architecture does not support -moutline-atomics; flag ignored">,
+  InGroup<OptionIgnored>;
+
 def warn_drv_darwin_sdk_invalid_settings : Warning<
   "SDK settings were ignored as 'SDKSettings.json' could not be parsed">,
   InGroup<DiagGroup<"darwin-sdk-settings">>;

diff  --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index d4aa4aa2f953..b1aaa2f8af9c 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2664,6 +2664,10 @@ def msmall_data_threshold_EQ : Joined <["-"], "msmall-data-threshold=">,
   Group<m_Group>, Alias<G>;
 def msoft_float : Flag<["-"], "msoft-float">, Group<m_Group>, Flags<[CC1Option]>,
   HelpText<"Use software floating point">;
+def moutline_atomics : Flag<["-"], "moutline-atomics">, Group<f_clang_Group>, Flags<[CC1Option]>,
+  HelpText<"Generate local calls to out-of-line atomic operations">;
+def mno_outline_atomics : Flag<["-"], "mno-outline-atomics">, Group<f_clang_Group>, Flags<[CC1Option]>,
+  HelpText<"Don't generate local calls to out-of-line atomic operations">;
 def mno_implicit_float : Flag<["-"], "mno-implicit-float">, Group<m_Group>,
   HelpText<"Don't generate implicit floating point instructions">;
 def mimplicit_float : Flag<["-"], "mimplicit-float">, Group<m_Group>;

diff  --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index ad242d24e5c9..174701b7261e 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -6367,6 +6367,23 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     }
   }
 
+  if (Arg *A = Args.getLastArg(options::OPT_moutline_atomics,
+                               options::OPT_mno_outline_atomics)) {
+    if (A->getOption().matches(options::OPT_moutline_atomics)) {
+      // Option -moutline-atomics supported for AArch64 target only.
+      if (!Triple.isAArch64()) {
+        D.Diag(diag::warn_drv_moutline_atomics_unsupported_opt)
+            << Triple.getArchName();
+      } else {
+        CmdArgs.push_back("-target-feature");
+        CmdArgs.push_back("+outline-atomics");
+      }
+    } else {
+      CmdArgs.push_back("-target-feature");
+      CmdArgs.push_back("-outline-atomics");
+    }
+  }
+
   if (Args.hasFlag(options::OPT_faddrsig, options::OPT_fno_addrsig,
                    (TC.getTriple().isOSBinFormatELF() ||
                     TC.getTriple().isOSBinFormatCOFF()) &&

diff  --git a/llvm/docs/Atomics.rst b/llvm/docs/Atomics.rst
index 7d57740ded7c..d091bf5e9cf7 100644
--- a/llvm/docs/Atomics.rst
+++ b/llvm/docs/Atomics.rst
@@ -621,3 +621,23 @@ fence on either side of a normal load or store.)
 There's also, somewhat separately, the possibility to lower ``ATOMIC_FENCE`` to
 ``__sync_synchronize()``. This may happen or not happen independent of all the
 above, controlled purely by ``setOperationAction(ISD::ATOMIC_FENCE, ...)``.
+
+On AArch64, a variant of the __sync_* routines is used which contain the memory
+order as part of the function name. These routines may determine at runtime
+whether the single-instruction atomic operations which were introduced as part
+of AArch64 Large System Extensions "LSE" instruction set are available, or if
+it needs to fall back to an LL/SC loop. The following helper functions are
+implemented in both ``compiler-rt`` and ``libgcc`` libraries
+(``N`` is one of 1, 2, 4, 8, and ``M`` is one of 1, 2, 4, 8 and 16, and
+``ORDER`` is one of 'relax', 'acq', 'rel', 'acq_rel')::
+
+  iM __aarch64_casM_ORDER(iM expected, iM desired, iM *ptr)
+  iN __aarch64_swpN_ORDER(iN val, iN *ptr)
+  iN __aarch64_ldaddN_ORDER(iN val, iN *ptr)
+  iN __aarch64_ldclrN_ORDER(iN val, iN *ptr)
+  iN __aarch64_ldeorN_ORDER(iN val, iN *ptr)
+  iN __aarch64_ldsetN_ORDER(iN val, iN *ptr)
+
+Please note, if LSE instruction set is specified for AArch64 target then
+out-of-line atomics calls are not generated and single-instruction atomic
+operations are used in place.

diff  --git a/llvm/include/llvm/CodeGen/RuntimeLibcalls.h b/llvm/include/llvm/CodeGen/RuntimeLibcalls.h
index f71f39e5bf03..86e24cab76f6 100644
--- a/llvm/include/llvm/CodeGen/RuntimeLibcalls.h
+++ b/llvm/include/llvm/CodeGen/RuntimeLibcalls.h
@@ -15,6 +15,7 @@
 #define LLVM_CODEGEN_RUNTIMELIBCALLS_H
 
 #include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/AtomicOrdering.h"
 
 namespace llvm {
 namespace RTLIB {
@@ -60,6 +61,10 @@ namespace RTLIB {
   /// UNKNOWN_LIBCALL if there is none.
   Libcall getSYNC(unsigned Opc, MVT VT);
 
+  /// Return the outline atomics value for the given opcode, atomic ordering
+  /// and type, or UNKNOWN_LIBCALL if there is none.
+  Libcall getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order, MVT VT);
+
   /// getMEMCPY_ELEMENT_UNORDERED_ATOMIC - Return
   /// MEMCPY_ELEMENT_UNORDERED_ATOMIC_* value for the given element size or
   /// UNKNOW_LIBCALL if there is none.

diff  --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index 301bb0494476..3329fa85ef3d 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -545,6 +545,23 @@ HANDLE_LIBCALL(ATOMIC_FETCH_NAND_4, "__atomic_fetch_nand_4")
 HANDLE_LIBCALL(ATOMIC_FETCH_NAND_8, "__atomic_fetch_nand_8")
 HANDLE_LIBCALL(ATOMIC_FETCH_NAND_16, "__atomic_fetch_nand_16")
 
+// Out-of-line atomics libcalls
+#define HLCALLS(A, N)                                                          \
+  HANDLE_LIBCALL(A##N##_RELAX, nullptr)                                        \
+  HANDLE_LIBCALL(A##N##_ACQ, nullptr)                                          \
+  HANDLE_LIBCALL(A##N##_REL, nullptr)                                          \
+  HANDLE_LIBCALL(A##N##_ACQ_REL, nullptr)
+#define HLCALL5(A)                                                             \
+  HLCALLS(A, 1) HLCALLS(A, 2) HLCALLS(A, 4) HLCALLS(A, 8) HLCALLS(A, 16)
+HLCALL5(OUTLINE_ATOMIC_CAS)
+HLCALL5(OUTLINE_ATOMIC_SWP)
+HLCALL5(OUTLINE_ATOMIC_LDADD)
+HLCALL5(OUTLINE_ATOMIC_LDSET)
+HLCALL5(OUTLINE_ATOMIC_LDCLR)
+HLCALL5(OUTLINE_ATOMIC_LDEOR)
+#undef HLCALLS
+#undef HLCALL5
+
 // Stack Protector Fail
 HANDLE_LIBCALL(STACKPROTECTOR_CHECK_FAIL, "__stack_chk_fail")
 

diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 74bfabf2a99e..31a992810ebc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4052,12 +4052,23 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
   case ISD::ATOMIC_LOAD_UMAX:
   case ISD::ATOMIC_CMP_SWAP: {
     MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
-    RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT);
-    assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
-
+    AtomicOrdering Order = cast<AtomicSDNode>(Node)->getOrdering();
+    RTLIB::Libcall LC = RTLIB::getOUTLINE_ATOMIC(Opc, Order, VT);
     EVT RetVT = Node->getValueType(0);
-    SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end());
     TargetLowering::MakeLibCallOptions CallOptions;
+    SmallVector<SDValue, 4> Ops;
+    if (TLI.getLibcallName(LC)) {
+      // If outline atomic available, prepare its arguments and expand.
+      Ops.append(Node->op_begin() + 2, Node->op_end());
+      Ops.push_back(Node->getOperand(1));
+
+    } else {
+      LC = RTLIB::getSYNC(Opc, VT);
+      assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+             "Unexpected atomic op or value type!");
+      // Arguments for expansion to sync libcall
+      Ops.append(Node->op_begin() + 1, Node->op_end());
+    }
     std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT,
                                                       Ops, CallOptions,
                                                       SDLoc(Node),

diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 2b6471e0e820..414e7692df2a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -2169,12 +2169,22 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
 std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
   unsigned Opc = Node->getOpcode();
   MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
-  RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT);
-  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
-
+  AtomicOrdering order = cast<AtomicSDNode>(Node)->getOrdering();
+  // Lower to outline atomic libcall if outline atomics enabled,
+  // or to sync libcall otherwise
+  RTLIB::Libcall LC = RTLIB::getOUTLINE_ATOMIC(Opc, order, VT);
   EVT RetVT = Node->getValueType(0);
-  SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end());
   TargetLowering::MakeLibCallOptions CallOptions;
+  SmallVector<SDValue, 4> Ops;
+  if (TLI.getLibcallName(LC)) {
+    Ops.append(Node->op_begin() + 2, Node->op_end());
+    Ops.push_back(Node->getOperand(1));
+  } else {
+    LC = RTLIB::getSYNC(Opc, VT);
+    assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+           "Unexpected atomic op or value type!");
+    Ops.append(Node->op_begin() + 1, Node->op_end());
+  }
   return TLI.makeLibCall(DAG, LC, RetVT, Ops, CallOptions, SDLoc(Node),
                          Node->getOperand(0));
 }

diff  --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 8b44885237c2..e4d9b46824ac 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -448,6 +448,83 @@ RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
   return UNKNOWN_LIBCALL;
 }
 
+RTLIB::Libcall RTLIB::getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order,
+                                        MVT VT) {
+  unsigned ModeN, ModelN;
+  switch (VT.SimpleTy) {
+  case MVT::i8:
+    ModeN = 0;
+    break;
+  case MVT::i16:
+    ModeN = 1;
+    break;
+  case MVT::i32:
+    ModeN = 2;
+    break;
+  case MVT::i64:
+    ModeN = 3;
+    break;
+  case MVT::i128:
+    ModeN = 4;
+    break;
+  default:
+    return UNKNOWN_LIBCALL;
+  }
+
+  switch (Order) {
+  case AtomicOrdering::Monotonic:
+    ModelN = 0;
+    break;
+  case AtomicOrdering::Acquire:
+    ModelN = 1;
+    break;
+  case AtomicOrdering::Release:
+    ModelN = 2;
+    break;
+  case AtomicOrdering::AcquireRelease:
+  case AtomicOrdering::SequentiallyConsistent:
+    ModelN = 3;
+    break;
+  default:
+    return UNKNOWN_LIBCALL;
+  }
+
+#define LCALLS(A, B)                                                           \
+  { A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL }
+#define LCALL5(A)                                                              \
+  LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
+  switch (Opc) {
+  case ISD::ATOMIC_CMP_SWAP: {
+    const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_CAS)};
+    return LC[ModeN][ModelN];
+  }
+  case ISD::ATOMIC_SWAP: {
+    const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_SWP)};
+    return LC[ModeN][ModelN];
+  }
+  case ISD::ATOMIC_LOAD_ADD: {
+    const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDADD)};
+    return LC[ModeN][ModelN];
+  }
+  case ISD::ATOMIC_LOAD_OR: {
+    const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDSET)};
+    return LC[ModeN][ModelN];
+  }
+  case ISD::ATOMIC_LOAD_CLR: {
+    const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDCLR)};
+    return LC[ModeN][ModelN];
+  }
+  case ISD::ATOMIC_LOAD_XOR: {
+    const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDEOR)};
+    return LC[ModeN][ModelN];
+  }
+  default:
+    return UNKNOWN_LIBCALL;
+  }
+#undef LCALLS
+#undef LCALL5
+}
+
 RTLIB::Libcall RTLIB::getSYNC(unsigned Opc, MVT VT) {
 #define OP_TO_LIBCALL(Name, Enum)                                              \
   case Name:                                                                   \

diff  --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 8bf85de4dc51..d73ded1c9cb0 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -61,6 +61,9 @@ def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true",
 def FeatureLSE : SubtargetFeature<"lse", "HasLSE", "true",
   "Enable ARMv8.1 Large System Extension (LSE) atomic instructions">;
 
+def FeatureOutlineAtomics : SubtargetFeature<"outline-atomics", "OutlineAtomics", "true",
+  "Enable out of line atomics to support LSE instructions">;
+
 def FeatureRDM : SubtargetFeature<"rdm", "HasRDM", "true",
   "Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions">;
 

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 463580c1e788..b92eb1d0e4f6 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -662,6 +662,57 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
   setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
 
+  // Generate outline atomics library calls only if LSE was not specified for
+  // subtarget
+  if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
+    setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, LibCall);
+    setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, LibCall);
+    setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall);
+    setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, LibCall);
+    setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, LibCall);
+    setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, LibCall);
+    setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, LibCall);
+    setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall);
+    setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, LibCall);
+    setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, LibCall);
+    setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, LibCall);
+    setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall);
+    setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, LibCall);
+    setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, LibCall);
+    setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, LibCall);
+    setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall);
+    setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, LibCall);
+    setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i8, LibCall);
+    setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i16, LibCall);
+    setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i32, LibCall);
+    setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i64, LibCall);
+    setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, LibCall);
+    setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, LibCall);
+    setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall);
+    setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, LibCall);
+#define LCALLNAMES(A, B, N)                                                    \
+  setLibcallName(A##N##_RELAX, #B #N "_relax");                                \
+  setLibcallName(A##N##_ACQ, #B #N "_acq");                                    \
+  setLibcallName(A##N##_REL, #B #N "_rel");                                    \
+  setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
+#define LCALLNAME4(A, B)                                                       \
+  LCALLNAMES(A, B, 1)                                                          \
+  LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
+#define LCALLNAME5(A, B)                                                       \
+  LCALLNAMES(A, B, 1)                                                          \
+  LCALLNAMES(A, B, 2)                                                          \
+  LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
+    LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
+    LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
+    LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
+    LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
+    LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
+    LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
+#undef LCALLNAMES
+#undef LCALLNAME4
+#undef LCALLNAME5
+  }
+
   // 128-bit loads and stores can be done without expanding
   setOperationAction(ISD::LOAD, MVT::i128, Custom);
   setOperationAction(ISD::STORE, MVT::i128, Custom);
@@ -10078,7 +10129,7 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
 SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op,
                                                     SelectionDAG &DAG) const {
   auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
-  if (!Subtarget.hasLSE())
+  if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
     return SDValue();
 
   // LSE has an atomic load-add instruction, but not a load-sub.
@@ -10095,7 +10146,7 @@ SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op,
 SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op,
                                                     SelectionDAG &DAG) const {
   auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
-  if (!Subtarget.hasLSE())
+  if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
     return SDValue();
 
   // LSE has an atomic load-clear instruction, but not a load-and.
@@ -15549,7 +15600,7 @@ static void ReplaceCMP_SWAP_128Results(SDNode *N,
   assert(N->getValueType(0) == MVT::i128 &&
          "AtomicCmpSwap on types less than 128 should be legal");
 
-  if (Subtarget->hasLSE()) {
+  if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) {
     // LSE has a 128-bit compare and swap (CASP), but i128 is not a legal type,
     // so lower it here, wrapped in REG_SEQUENCE and EXTRACT_SUBREG.
     SDValue Ops[] = {
@@ -15778,14 +15829,30 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
   // Nand not supported in LSE.
   if (AI->getOperation() == AtomicRMWInst::Nand) return AtomicExpansionKind::LLSC;
   // Leave 128 bits to LLSC.
-  return (Subtarget->hasLSE() && Size < 128) ? AtomicExpansionKind::None : AtomicExpansionKind::LLSC;
+  if (Subtarget->hasLSE() && Size < 128)
+    return AtomicExpansionKind::None;
+  if (Subtarget->outlineAtomics() && Size < 128) {
+    // [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far.
+    // Don't outline them unless
+    // (1) high level <atomic> support approved:
+    //   http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
+    // (2) low level libgcc and compiler-rt support implemented by:
+    //   min/max outline atomics helpers
+    if (AI->getOperation() != AtomicRMWInst::Min &&
+        AI->getOperation() != AtomicRMWInst::Max &&
+        AI->getOperation() != AtomicRMWInst::UMin &&
+        AI->getOperation() != AtomicRMWInst::UMax) {
+      return AtomicExpansionKind::None;
+    }
+  }
+  return AtomicExpansionKind::LLSC;
 }
 
 TargetLowering::AtomicExpansionKind
 AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
     AtomicCmpXchgInst *AI) const {
   // If subtarget has LSE, leave cmpxchg intact for codegen.
-  if (Subtarget->hasLSE())
+  if (Subtarget->hasLSE() || Subtarget->outlineAtomics())
     return AtomicExpansionKind::None;
   // At -O0, fast-regalloc cannot cope with the live vregs necessary to
   // implement cmpxchg without spilling. If the address being exchanged is also

diff  --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 4eb484364d58..631d18a8198d 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -195,6 +195,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
   // Enable 64-bit vectorization in SLP.
   unsigned MinVectorRegisterBitWidth = 64;
 
+  bool OutlineAtomics = false;
   bool UseAA = false;
   bool PredictableSelectIsExpensive = false;
   bool BalanceFPOps = false;
@@ -471,6 +472,8 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
 
   bool useAA() const override { return UseAA; }
 
+  bool outlineAtomics() const { return OutlineAtomics; }
+
   bool hasVH() const { return HasVH; }
   bool hasPAN() const { return HasPAN; }
   bool hasLOR() const { return HasLOR; }

diff  --git a/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll b/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll
index 21e3c768ee69..2409a3463df4 100644
--- a/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll
@@ -1,8 +1,10 @@
 ; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone -mattr=+outline-atomics | FileCheck %s -check-prefix=OUTLINE-ATOMICS
 
 @var = global i128 0
 
 define i128 @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) {
+; OUTLINE-ATOMICS: bl __aarch64_cas16_acq
 ; CHECK-LABEL: val_compare_and_swap:
 ; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 ; CHECK: ldaxp   [[RESULTLO:x[0-9]+]], [[RESULTHI:x[0-9]+]], [x[[ADDR:[0-9]+]]]

diff  --git a/llvm/test/CodeGen/AArch64/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/arm64-atomic.ll
index db343e95a3de..cfdbcf9bfac9 100644
--- a/llvm/test/CodeGen/AArch64/arm64-atomic.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-atomic.ll
@@ -1,6 +1,8 @@
 ; RUN: llc < %s -mtriple=arm64-eabi -asm-verbose=false -verify-machineinstrs -mcpu=cyclone | FileCheck -enable-var-scope %s
+; RUN: llc < %s -mtriple=arm64-eabi -asm-verbose=false -verify-machineinstrs -mcpu=cyclone -mattr=+outline-atomics | FileCheck -enable-var-scope %s -check-prefix=OUTLINE-ATOMICS
 
 define i32 @val_compare_and_swap(i32* %p, i32 %cmp, i32 %new) #0 {
+; OUTLINE-ATOMICS: bl __aarch64_cas4_acq
 ; CHECK-LABEL: val_compare_and_swap:
 ; CHECK-NEXT: mov    x[[ADDR:[0-9]+]], x0
 ; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]:
@@ -19,6 +21,7 @@ define i32 @val_compare_and_swap(i32* %p, i32 %cmp, i32 %new) #0 {
 }
 
 define i32 @val_compare_and_swap_from_load(i32* %p, i32 %cmp, i32* %pnew) #0 {
+; OUTLINE-ATOMICS: bl __aarch64_cas4_acq
 ; CHECK-LABEL: val_compare_and_swap_from_load:
 ; CHECK-NEXT: ldr    [[NEW:w[0-9]+]], [x2]
 ; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]:
@@ -40,6 +43,7 @@ define i32 @val_compare_and_swap_from_load(i32* %p, i32 %cmp, i32* %pnew) #0 {
 }
 
 define i32 @val_compare_and_swap_rel(i32* %p, i32 %cmp, i32 %new) #0 {
+; OUTLINE-ATOMICS: bl __aarch64_cas4_acq_rel
 ; CHECK-LABEL: val_compare_and_swap_rel:
 ; CHECK-NEXT: mov    x[[ADDR:[0-9]+]], x0
 ; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]:
@@ -58,6 +62,7 @@ define i32 @val_compare_and_swap_rel(i32* %p, i32 %cmp, i32 %new) #0 {
 }
 
 define i64 @val_compare_and_swap_64(i64* %p, i64 %cmp, i64 %new) #0 {
+; OUTLINE-ATOMICS: bl __aarch64_cas8_relax
 ; CHECK-LABEL: val_compare_and_swap_64:
 ; CHECK-NEXT: mov    x[[ADDR:[0-9]+]], x0
 ; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]:
@@ -104,6 +109,7 @@ define i64 @fetch_and_nand_64(i64* %p) #0 {
 }
 
 define i32 @fetch_and_or(i32* %p) #0 {
+; OUTLINE-ATOMICS: bl __aarch64_ldset4_acq_rel
 ; CHECK-LABEL: fetch_and_or:
 ; CHECK: mov   [[OLDVAL_REG:w[0-9]+]], #5
 ; CHECK: [[TRYBB:.?LBB[0-9_]+]]:
@@ -118,6 +124,7 @@ define i32 @fetch_and_or(i32* %p) #0 {
 }
 
 define i64 @fetch_and_or_64(i64* %p) #0 {
+; OUTLINE-ATOMICS: bl __aarch64_ldset8_relax
 ; CHECK: fetch_and_or_64:
 ; CHECK: mov    x[[ADDR:[0-9]+]], x0
 ; CHECK: [[TRYBB:.?LBB[0-9_]+]]:
@@ -332,6 +339,7 @@ define void @atomic_store_relaxed_32(i32* %p, i32 %off32, i32 %val) #0 {
 }
 
 define void @atomic_store_relaxed_64(i64* %p, i32 %off32, i64 %val) #0 {
+; OUTLINE-ATOMICS: bl __aarch64_ldadd4_acq_rel
 ; CHECK-LABEL: atomic_store_relaxed_64:
   %ptr_unsigned = getelementptr i64, i64* %p, i32 4095
   store atomic i64 %val, i64* %ptr_unsigned monotonic, align 8

diff  --git a/llvm/test/CodeGen/AArch64/arm64_32-atomics.ll b/llvm/test/CodeGen/AArch64/arm64_32-atomics.ll
index 34682e82f3da..bd7452fde2a4 100644
--- a/llvm/test/CodeGen/AArch64/arm64_32-atomics.ll
+++ b/llvm/test/CodeGen/AArch64/arm64_32-atomics.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple=arm64_32-apple-ios7.0 -o - %s | FileCheck %s
+; RUN: llc -mtriple=arm64_32-apple-ios7.0 -mattr=+outline-atomics -o - %s | FileCheck %s -check-prefix=OUTLINE-ATOMICS
 
 define i8 @test_load_8(i8* %addr) {
 ; CHECK-LABAL: test_load_8:
@@ -239,6 +240,7 @@ define i32 @test_stlxr_64(i64* %addr, i64 %val) {
 }
 
 define {i8*, i1} @test_cmpxchg_ptr(i8** %addr, i8* %cmp, i8* %new) {
+; OUTLINE-ATOMICS: bl ___aarch64_cas4_acq_rel
 ; CHECK-LABEL: test_cmpxchg_ptr:
 ; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
 ; CHECK:     ldaxr [[OLD:w[0-9]+]], [x0]

diff  --git a/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll b/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll
index a75614d20f7b..c7d3dc048ddd 100644
--- a/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll
+++ b/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll
@@ -1,4 +1,6 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+lse < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+lse -mattr=+outline-atomics < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+outline-atomics < %s | FileCheck %s --check-prefix=OUTLINE-ATOMICS
 ; RUN: llc -mtriple=aarch64_be-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+lse < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+lse < %s | FileCheck %s --check-prefix=CHECK-REG
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mcpu=saphira < %s | FileCheck %s
@@ -16,6 +18,14 @@
 
 define i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i8:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -29,6 +39,14 @@ define i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_add_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i16:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add i16* @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -42,6 +60,14 @@ define i16 @test_atomic_load_add_i16(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i32:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -55,6 +81,14 @@ define i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i64:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -68,6 +102,14 @@ define i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
 
 define void @test_atomic_load_add_i32_noret(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i32_noret:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw add i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -80,6 +122,14 @@ define void @test_atomic_load_add_i32_noret(i32 %offset) nounwind {
 
 define void @test_atomic_load_add_i64_noret(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i64_noret:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw add i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -92,6 +142,14 @@ define void @test_atomic_load_add_i64_noret(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i8:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset1_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -105,6 +163,14 @@ define i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i16:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset2_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or i16* @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -118,6 +184,14 @@ define i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_or_i32(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i32:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -131,6 +205,14 @@ define i32 @test_atomic_load_or_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i64:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -144,6 +226,14 @@ define i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
 
 define void @test_atomic_load_or_i32_noret(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i32_noret:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw or i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -156,6 +246,14 @@ define void @test_atomic_load_or_i32_noret(i32 %offset) nounwind {
 
 define void @test_atomic_load_or_i64_noret(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i64_noret:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw or i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -168,6 +266,14 @@ define void @test_atomic_load_or_i64_noret(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i8:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor1_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -181,6 +287,14 @@ define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i16:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor2_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor i16* @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -194,6 +308,14 @@ define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i32:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -207,6 +329,14 @@ define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i64:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -220,6 +350,14 @@ define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
 
 define void @test_atomic_load_xor_i32_noret(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i32_noret:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xor i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -232,6 +370,14 @@ define void @test_atomic_load_xor_i32_noret(i32 %offset) nounwind {
 
 define void @test_atomic_load_xor_i64_noret(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i64_noret:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xor i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -244,6 +390,21 @@ define void @test_atomic_load_xor_i64_noret(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_min_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i8:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:  .LBB18_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxrb w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
+; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
+; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB18_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -257,6 +418,21 @@ define i8 @test_atomic_load_min_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_min_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i16:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:  .LBB19_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxrh w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    sxth w8, w10
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
+; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
+; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB19_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min i16* @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -270,6 +446,20 @@ define i16 @test_atomic_load_min_i16(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i32:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB20_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, le
+; OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB20_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -283,6 +473,20 @@ define i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_min_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i64:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB21_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, le
+; OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB21_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -296,6 +500,19 @@ define i64 @test_atomic_load_min_i64(i64 %offset) nounwind {
 
 define void @test_atomic_load_min_i32_noret(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i32_noret:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB22_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, le
+; OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB22_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw min i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -308,6 +525,19 @@ define void @test_atomic_load_min_i32_noret(i32 %offset) nounwind {
 
 define void @test_atomic_load_min_i64_noret(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i64_noret:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB23_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, le
+; OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB23_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw min i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -320,6 +550,20 @@ define void @test_atomic_load_min_i64_noret(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i8:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:  .LBB24_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxrb w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxtb
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB24_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -333,6 +577,20 @@ define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i16:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:  .LBB25_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxrh w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxth
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB25_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i16* @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -346,6 +604,20 @@ define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i32:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB26_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB26_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -359,6 +631,20 @@ define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i64:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB27_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, ls
+; OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB27_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -372,6 +658,19 @@ define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind {
 
 define void @test_atomic_load_umin_i32_noret(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i32_noret:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB28_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, ls
+; OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB28_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umin i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -384,6 +683,19 @@ define void @test_atomic_load_umin_i32_noret(i32 %offset) nounwind {
 
 define void @test_atomic_load_umin_i64_noret(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i64_noret:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB29_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, ls
+; OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB29_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umin i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -396,6 +708,21 @@ define void @test_atomic_load_umin_i64_noret(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i8:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:  .LBB30_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxrb w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
+; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
+; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB30_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -409,6 +736,21 @@ define i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_max_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i16:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:  .LBB31_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxrh w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    sxth w8, w10
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
+; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
+; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB31_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max i16* @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -422,6 +764,20 @@ define i16 @test_atomic_load_max_i16(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_max_i32(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i32:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB32_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, gt
+; OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB32_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -435,6 +791,20 @@ define i32 @test_atomic_load_max_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i64:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB33_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, gt
+; OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB33_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -448,6 +818,19 @@ define i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
 
 define void @test_atomic_load_max_i32_noret(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i32_noret:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB34_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, gt
+; OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB34_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw max i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -460,6 +843,19 @@ define void @test_atomic_load_max_i32_noret(i32 %offset) nounwind {
 
 define void @test_atomic_load_max_i64_noret(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i64_noret:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB35_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, gt
+; OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB35_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw max i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -472,6 +868,20 @@ define void @test_atomic_load_max_i64_noret(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i8:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:  .LBB36_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxrb w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxtb
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB36_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -485,6 +895,20 @@ define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i16:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:  .LBB37_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxrh w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxth
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB37_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i16* @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -498,6 +922,20 @@ define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i32:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB38_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB38_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -511,6 +949,20 @@ define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i64:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB39_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, hi
+; OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB39_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -524,6 +976,19 @@ define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
 
 define void @test_atomic_load_umax_i32_noret(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i32_noret:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB40_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, hi
+; OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB40_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umax i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -536,6 +1001,19 @@ define void @test_atomic_load_umax_i32_noret(i32 %offset) nounwind {
 
 define void @test_atomic_load_umax_i64_noret(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i64_noret:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB41_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, hi
+; OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB41_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umax i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -548,6 +1026,14 @@ define void @test_atomic_load_umax_i64_noret(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i8:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp1_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -561,6 +1047,14 @@ define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i16:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp2_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg i16* @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -574,6 +1068,14 @@ define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i32:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -587,6 +1089,14 @@ define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i64:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -600,6 +1110,14 @@ define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
 
 define void @test_atomic_load_xchg_i32_noret(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i32_noret:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xchg i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -613,6 +1131,14 @@ define void @test_atomic_load_xchg_i32_noret(i32 %offset) nounwind {
 
 define void @test_atomic_load_xchg_i64_noret(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i64_noret:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xchg i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -626,6 +1152,14 @@ define void @test_atomic_load_xchg_i64_noret(i64 %offset) nounwind {
 
 define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i8:
+; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i8:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x2, var8
+; OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas1_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire
    %old = extractvalue { i8, i1 } %pair, 0
 
@@ -640,6 +1174,17 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 
 define i1 @test_atomic_cmpxchg_i8_1(i8 %wanted, i8 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i8_1:
+; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i8_1:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mov w19, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x2, var8
+; OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas1_acq
+; OUTLINE-ATOMICS-NEXT:    cmp w0, w19, uxtb
+; OUTLINE-ATOMICS-NEXT:    cset w0, eq
+; OUTLINE-ATOMICS-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire
    %success = extractvalue { i8, i1 } %pair, 1
 
@@ -656,6 +1201,14 @@ define i1 @test_atomic_cmpxchg_i8_1(i8 %wanted, i8 %new) nounwind {
 
 define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i16:
+; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i16:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x2, var16
+; OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas2_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg i16* @var16, i16 %wanted, i16 %new acquire acquire
    %old = extractvalue { i16, i1 } %pair, 0
 
@@ -670,6 +1223,17 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 
 define i1 @test_atomic_cmpxchg_i16_1(i16 %wanted, i16 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i16_1:
+; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i16_1:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mov w19, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x2, var16
+; OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas2_acq
+; OUTLINE-ATOMICS-NEXT:    cmp w0, w19, uxth
+; OUTLINE-ATOMICS-NEXT:    cset w0, eq
+; OUTLINE-ATOMICS-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg i16* @var16, i16 %wanted, i16 %new acquire acquire
    %success = extractvalue { i16, i1 } %pair, 1
 
@@ -687,6 +1251,14 @@ define i1 @test_atomic_cmpxchg_i16_1(i16 %wanted, i16 %new) nounwind {
 
 define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i32:
+; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x2, var32
+; OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas4_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg i32* @var32, i32 %wanted, i32 %new acquire acquire
    %old = extractvalue { i32, i1 } %pair, 0
 
@@ -702,6 +1274,14 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 
 define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i64:
+; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i64:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x2, var64
+; OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas8_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg i64* @var64, i64 %wanted, i64 %new acquire acquire
    %old = extractvalue { i64, i1 } %pair, 0
 
@@ -717,6 +1297,14 @@ define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
 
 define i128 @test_atomic_cmpxchg_i128(i128 %wanted, i128 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i128:
+; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i128:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x4, var128
+; OUTLINE-ATOMICS-NEXT:    add x4, x4, :lo12:var128
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas16_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg i128* @var128, i128 %wanted, i128 %new acquire acquire
    %old = extractvalue { i128, i1 } %pair, 0
 
@@ -732,6 +1320,15 @@ define i128 @test_atomic_cmpxchg_i128(i128 %wanted, i128 %new) nounwind {
 
 define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i8:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
@@ -746,6 +1343,15 @@ define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i16:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub i16* @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
@@ -760,6 +1366,15 @@ define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i32:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
@@ -774,6 +1389,15 @@ define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i64:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg x0, x0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
@@ -788,6 +1412,15 @@ define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
 
 define void @test_atomic_load_sub_i32_noret(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i32_noret:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw sub i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
@@ -802,6 +1435,15 @@ define void @test_atomic_load_sub_i32_noret(i32 %offset) nounwind {
 
 define void @test_atomic_load_sub_i64_noret(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i64_noret:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg x0, x0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw sub i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
@@ -816,6 +1458,15 @@ define void @test_atomic_load_sub_i64_noret(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_sub_i8_neg_imm() nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i8_neg_imm:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_neg_imm:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    mov w0, #1
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub i8* @var8, i8 -1 seq_cst
 
 ; CHECK-NOT: dmb
@@ -830,6 +1481,15 @@ define i8 @test_atomic_load_sub_i8_neg_imm() nounwind {
 
 define i16 @test_atomic_load_sub_i16_neg_imm() nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i16_neg_imm:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_neg_imm:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    mov w0, #1
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub i16* @var16, i16 -1 seq_cst
 
 ; CHECK-NOT: dmb
@@ -844,6 +1504,15 @@ define i16 @test_atomic_load_sub_i16_neg_imm() nounwind {
 
 define i32 @test_atomic_load_sub_i32_neg_imm() nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i32_neg_imm:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_neg_imm:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    mov w0, #1
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub i32* @var32, i32 -1 seq_cst
 
 ; CHECK-NOT: dmb
@@ -858,6 +1527,15 @@ define i32 @test_atomic_load_sub_i32_neg_imm() nounwind {
 
 define i64 @test_atomic_load_sub_i64_neg_imm() nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i64_neg_imm:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_neg_imm:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    mov w0, #1
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub i64* @var64, i64 -1 seq_cst
 
 ; CHECK-NOT: dmb
@@ -872,6 +1550,14 @@ define i64 @test_atomic_load_sub_i64_neg_imm() nounwind {
 
 define i8 @test_atomic_load_sub_i8_neg_arg(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i8_neg_arg:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_neg_arg:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %neg = sub i8 0, %offset
   %old = atomicrmw sub i8* @var8, i8 %neg seq_cst
 
@@ -886,6 +1572,14 @@ define i8 @test_atomic_load_sub_i8_neg_arg(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_sub_i16_neg_arg(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i16_neg_arg:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_neg_arg:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %neg = sub i16 0, %offset
   %old = atomicrmw sub i16* @var16, i16 %neg seq_cst
 
@@ -900,6 +1594,14 @@ define i16 @test_atomic_load_sub_i16_neg_arg(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_sub_i32_neg_arg(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i32_neg_arg:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_neg_arg:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %neg = sub i32 0, %offset
   %old = atomicrmw sub i32* @var32, i32 %neg seq_cst
 
@@ -914,6 +1616,14 @@ define i32 @test_atomic_load_sub_i32_neg_arg(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_sub_i64_neg_arg(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i64_neg_arg:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_neg_arg:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %neg = sub i64 0, %offset
   %old = atomicrmw sub i64* @var64, i64 %neg seq_cst
 
@@ -928,6 +1638,15 @@ define i64 @test_atomic_load_sub_i64_neg_arg(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i8:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr1_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
@@ -941,6 +1660,15 @@ define i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_and_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i16:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr2_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and i16* @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
@@ -954,6 +1682,15 @@ define i16 @test_atomic_load_and_i16(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_and_i32(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i32:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
@@ -967,6 +1704,15 @@ define i32 @test_atomic_load_and_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_and_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i64:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn x0, x0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
@@ -980,6 +1726,15 @@ define i64 @test_atomic_load_and_i64(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_and_i8_inv_imm() nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i8_inv_imm:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_inv_imm:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    mov w0, #1
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr1_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and i8* @var8, i8 -2 seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -992,6 +1747,15 @@ define i8 @test_atomic_load_and_i8_inv_imm() nounwind {
 
 define i16 @test_atomic_load_and_i16_inv_imm() nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i16_inv_imm:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_inv_imm:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    mov w0, #1
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr2_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and i16* @var16, i16 -2 seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -1004,6 +1768,15 @@ define i16 @test_atomic_load_and_i16_inv_imm() nounwind {
 
 define i32 @test_atomic_load_and_i32_inv_imm() nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i32_inv_imm:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_inv_imm:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    mov w0, #1
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and i32* @var32, i32 -2 seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -1016,6 +1789,15 @@ define i32 @test_atomic_load_and_i32_inv_imm() nounwind {
 
 define i64 @test_atomic_load_and_i64_inv_imm() nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i64_inv_imm:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_inv_imm:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    mov w0, #1
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and i64* @var64, i64 -2 seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -1028,6 +1810,14 @@ define i64 @test_atomic_load_and_i64_inv_imm() nounwind {
 
 define i8 @test_atomic_load_and_i8_inv_arg(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i8_inv_arg:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_inv_arg:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr1_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %inv = xor i8 %offset, -1
   %old = atomicrmw and i8* @var8, i8 %inv seq_cst
 ; CHECK-NOT: dmb
@@ -1040,6 +1830,14 @@ define i8 @test_atomic_load_and_i8_inv_arg(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_and_i16_inv_arg(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i16_inv_arg:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_inv_arg:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr2_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %inv = xor i16 %offset, -1
   %old = atomicrmw and i16* @var16, i16 %inv seq_cst
 ; CHECK-NOT: dmb
@@ -1052,6 +1850,14 @@ define i16 @test_atomic_load_and_i16_inv_arg(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_and_i32_inv_arg(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i32_inv_arg:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_inv_arg:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %inv = xor i32 %offset, -1
   %old = atomicrmw and i32* @var32, i32 %inv seq_cst
 ; CHECK-NOT: dmb
@@ -1064,6 +1870,14 @@ define i32 @test_atomic_load_and_i32_inv_arg(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_and_i64_inv_arg(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i64_inv_arg:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_inv_arg:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %inv = xor i64 %offset, -1
   %old = atomicrmw and i64* @var64, i64 %inv seq_cst
 ; CHECK-NOT: dmb
@@ -1076,6 +1890,15 @@ define i64 @test_atomic_load_and_i64_inv_arg(i64 %offset) nounwind {
 
 define void @test_atomic_load_and_i32_noret(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i32_noret:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw and i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
@@ -1089,6 +1912,15 @@ define void @test_atomic_load_and_i32_noret(i32 %offset) nounwind {
 
 define void @test_atomic_load_and_i64_noret(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i64_noret:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn x0, x0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw and i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
@@ -1102,6 +1934,14 @@ define void @test_atomic_load_and_i64_noret(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_add_i8_acq_rel(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i8_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add i8* @var8, i8 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -1115,6 +1955,14 @@ define i8 @test_atomic_load_add_i8_acq_rel(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_add_i16_acq_rel(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i16_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add i16* @var16, i16 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -1128,6 +1976,14 @@ define i16 @test_atomic_load_add_i16_acq_rel(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_add_i32_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i32_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add i32* @var32, i32 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -1141,6 +1997,14 @@ define i32 @test_atomic_load_add_i32_acq_rel(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_add_i64_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i64_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add i64* @var64, i64 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -1154,6 +2018,14 @@ define i64 @test_atomic_load_add_i64_acq_rel(i64 %offset) nounwind {
 
 define void @test_atomic_load_add_i32_noret_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i32_noret_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw add i32* @var32, i32 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -1166,6 +2038,14 @@ define void @test_atomic_load_add_i32_noret_acq_rel(i32 %offset) nounwind {
 
 define void @test_atomic_load_add_i64_noret_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i64_noret_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw add i64* @var64, i64 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -1178,6 +2058,14 @@ define void @test_atomic_load_add_i64_noret_acq_rel(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_add_i8_acquire(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i8_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add i8* @var8, i8 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -1191,6 +2079,14 @@ define i8 @test_atomic_load_add_i8_acquire(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_add_i16_acquire(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i16_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add i16* @var16, i16 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -1204,6 +2100,14 @@ define i16 @test_atomic_load_add_i16_acquire(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_add_i32_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i32_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add i32* @var32, i32 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -1217,6 +2121,14 @@ define i32 @test_atomic_load_add_i32_acquire(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_add_i64_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i64_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add i64* @var64, i64 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -1230,6 +2142,14 @@ define i64 @test_atomic_load_add_i64_acquire(i64 %offset) nounwind {
 
 define void @test_atomic_load_add_i32_noret_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i32_noret_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw add i32* @var32, i32 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -1242,6 +2162,14 @@ define void @test_atomic_load_add_i32_noret_acquire(i32 %offset) nounwind {
 
 define void @test_atomic_load_add_i64_noret_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i64_noret_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw add i64* @var64, i64 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -1254,6 +2182,14 @@ define void @test_atomic_load_add_i64_noret_acquire(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_add_i8_monotonic(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i8_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add i8* @var8, i8 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -1267,6 +2203,14 @@ define i8 @test_atomic_load_add_i8_monotonic(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_add_i16_monotonic(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i16_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add i16* @var16, i16 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -1280,6 +2224,14 @@ define i16 @test_atomic_load_add_i16_monotonic(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_add_i32_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i32_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add i32* @var32, i32 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -1293,6 +2245,14 @@ define i32 @test_atomic_load_add_i32_monotonic(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_add_i64_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i64_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add i64* @var64, i64 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -1306,6 +2266,14 @@ define i64 @test_atomic_load_add_i64_monotonic(i64 %offset) nounwind {
 
 define void @test_atomic_load_add_i32_noret_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i32_noret_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw add i32* @var32, i32 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -1318,6 +2286,14 @@ define void @test_atomic_load_add_i32_noret_monotonic(i32 %offset) nounwind {
 
 define void @test_atomic_load_add_i64_noret_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i64_noret_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw add i64* @var64, i64 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -1330,6 +2306,14 @@ define void @test_atomic_load_add_i64_noret_monotonic(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_add_i8_release(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i8_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add i8* @var8, i8 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -1343,6 +2327,14 @@ define i8 @test_atomic_load_add_i8_release(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_add_i16_release(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i16_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add i16* @var16, i16 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -1356,6 +2348,14 @@ define i16 @test_atomic_load_add_i16_release(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_add_i32_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i32_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add i32* @var32, i32 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -1369,6 +2369,14 @@ define i32 @test_atomic_load_add_i32_release(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_add_i64_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i64_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add i64* @var64, i64 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -1382,6 +2390,14 @@ define i64 @test_atomic_load_add_i64_release(i64 %offset) nounwind {
 
 define void @test_atomic_load_add_i32_noret_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i32_noret_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw add i32* @var32, i32 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -1394,6 +2410,14 @@ define void @test_atomic_load_add_i32_noret_release(i32 %offset) nounwind {
 
 define void @test_atomic_load_add_i64_noret_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i64_noret_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw add i64* @var64, i64 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -1406,6 +2430,14 @@ define void @test_atomic_load_add_i64_noret_release(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_add_i8_seq_cst(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i8_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -1419,6 +2451,14 @@ define i8 @test_atomic_load_add_i8_seq_cst(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_add_i16_seq_cst(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i16_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add i16* @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -1432,6 +2472,14 @@ define i16 @test_atomic_load_add_i16_seq_cst(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_add_i32_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i32_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -1445,6 +2493,14 @@ define i32 @test_atomic_load_add_i32_seq_cst(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_add_i64_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i64_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -1458,6 +2514,14 @@ define i64 @test_atomic_load_add_i64_seq_cst(i64 %offset) nounwind {
 
 define void @test_atomic_load_add_i32_noret_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i32_noret_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw add i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -1470,6 +2534,14 @@ define void @test_atomic_load_add_i32_noret_seq_cst(i32 %offset) nounwind {
 
 define void @test_atomic_load_add_i64_noret_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i64_noret_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw add i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -1482,6 +2554,15 @@ define void @test_atomic_load_add_i64_noret_seq_cst(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_and_i8_acq_rel(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i8_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr1_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and i8* @var8, i8 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
@@ -1495,6 +2576,15 @@ define i8 @test_atomic_load_and_i8_acq_rel(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_and_i16_acq_rel(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i16_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr2_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and i16* @var16, i16 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
@@ -1508,6 +2598,15 @@ define i16 @test_atomic_load_and_i16_acq_rel(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_and_i32_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i32_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and i32* @var32, i32 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
@@ -1521,6 +2620,15 @@ define i32 @test_atomic_load_and_i32_acq_rel(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_and_i64_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i64_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn x0, x0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and i64* @var64, i64 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
@@ -1534,6 +2642,15 @@ define i64 @test_atomic_load_and_i64_acq_rel(i64 %offset) nounwind {
 
 define void @test_atomic_load_and_i32_noret_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i32_noret_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw and i32* @var32, i32 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
@@ -1547,6 +2664,15 @@ define void @test_atomic_load_and_i32_noret_acq_rel(i32 %offset) nounwind {
 
 define void @test_atomic_load_and_i64_noret_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i64_noret_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn x0, x0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw and i64* @var64, i64 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
@@ -1560,6 +2686,15 @@ define void @test_atomic_load_and_i64_noret_acq_rel(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_and_i8_acquire(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i8_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr1_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and i8* @var8, i8 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
@@ -1573,6 +2708,15 @@ define i8 @test_atomic_load_and_i8_acquire(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_and_i16_acquire(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i16_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr2_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and i16* @var16, i16 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
@@ -1586,6 +2730,15 @@ define i16 @test_atomic_load_and_i16_acquire(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_and_i32_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i32_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and i32* @var32, i32 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
@@ -1599,6 +2752,15 @@ define i32 @test_atomic_load_and_i32_acquire(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_and_i64_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i64_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn x0, x0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and i64* @var64, i64 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
@@ -1612,6 +2774,15 @@ define i64 @test_atomic_load_and_i64_acquire(i64 %offset) nounwind {
 
 define void @test_atomic_load_and_i32_noret_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i32_noret_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw and i32* @var32, i32 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
@@ -1625,6 +2796,15 @@ define void @test_atomic_load_and_i32_noret_acquire(i32 %offset) nounwind {
 
 define void @test_atomic_load_and_i64_noret_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i64_noret_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn x0, x0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw and i64* @var64, i64 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
@@ -1638,6 +2818,15 @@ define void @test_atomic_load_and_i64_noret_acquire(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_and_i8_monotonic(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i8_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr1_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and i8* @var8, i8 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
@@ -1651,6 +2840,15 @@ define i8 @test_atomic_load_and_i8_monotonic(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_and_i16_monotonic(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i16_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr2_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and i16* @var16, i16 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
@@ -1664,6 +2862,15 @@ define i16 @test_atomic_load_and_i16_monotonic(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_and_i32_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i32_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and i32* @var32, i32 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
@@ -1677,6 +2884,15 @@ define i32 @test_atomic_load_and_i32_monotonic(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_and_i64_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i64_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn x0, x0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and i64* @var64, i64 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
@@ -1690,6 +2906,15 @@ define i64 @test_atomic_load_and_i64_monotonic(i64 %offset) nounwind {
 
 define void @test_atomic_load_and_i32_noret_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i32_noret_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw and i32* @var32, i32 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
@@ -1703,6 +2928,15 @@ define void @test_atomic_load_and_i32_noret_monotonic(i32 %offset) nounwind {
 
 define void @test_atomic_load_and_i64_noret_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i64_noret_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn x0, x0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw and i64* @var64, i64 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
@@ -1716,6 +2950,15 @@ define void @test_atomic_load_and_i64_noret_monotonic(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_and_i8_release(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i8_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr1_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and i8* @var8, i8 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
@@ -1729,6 +2972,15 @@ define i8 @test_atomic_load_and_i8_release(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_and_i16_release(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i16_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr2_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and i16* @var16, i16 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
@@ -1742,6 +2994,15 @@ define i16 @test_atomic_load_and_i16_release(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_and_i32_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i32_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and i32* @var32, i32 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
@@ -1755,6 +3016,15 @@ define i32 @test_atomic_load_and_i32_release(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_and_i64_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i64_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn x0, x0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and i64* @var64, i64 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
@@ -1768,6 +3038,15 @@ define i64 @test_atomic_load_and_i64_release(i64 %offset) nounwind {
 
 define void @test_atomic_load_and_i32_noret_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i32_noret_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw and i32* @var32, i32 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
@@ -1781,6 +3060,15 @@ define void @test_atomic_load_and_i32_noret_release(i32 %offset) nounwind {
 
 define void @test_atomic_load_and_i64_noret_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i64_noret_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn x0, x0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw and i64* @var64, i64 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
@@ -1794,6 +3082,15 @@ define void @test_atomic_load_and_i64_noret_release(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_and_i8_seq_cst(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i8_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr1_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
@@ -1807,6 +3104,15 @@ define i8 @test_atomic_load_and_i8_seq_cst(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_and_i16_seq_cst(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i16_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr2_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and i16* @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
@@ -1820,6 +3126,15 @@ define i16 @test_atomic_load_and_i16_seq_cst(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_and_i32_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i32_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
@@ -1833,6 +3148,15 @@ define i32 @test_atomic_load_and_i32_seq_cst(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_and_i64_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i64_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn x0, x0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
@@ -1846,6 +3170,15 @@ define i64 @test_atomic_load_and_i64_seq_cst(i64 %offset) nounwind {
 
 define void @test_atomic_load_and_i32_noret_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i32_noret_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw and i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
@@ -1859,6 +3192,15 @@ define void @test_atomic_load_and_i32_noret_seq_cst(i32 %offset) nounwind {
 
 define void @test_atomic_load_and_i64_noret_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i64_noret_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mvn x0, x0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw and i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
@@ -1872,6 +3214,14 @@ define void @test_atomic_load_and_i64_noret_seq_cst(i64 %offset) nounwind {
 
 define i8 @test_atomic_cmpxchg_i8_acquire(i8 %wanted, i8 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i8_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i8_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x2, var8
+; OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas1_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire
    %old = extractvalue { i8, i1 } %pair, 0
 
@@ -1887,6 +3237,14 @@ define i8 @test_atomic_cmpxchg_i8_acquire(i8 %wanted, i8 %new) nounwind {
 
 define i16 @test_atomic_cmpxchg_i16_acquire(i16 %wanted, i16 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i16_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i16_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x2, var16
+; OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas2_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg i16* @var16, i16 %wanted, i16 %new acquire acquire
    %old = extractvalue { i16, i1 } %pair, 0
 
@@ -1902,6 +3260,14 @@ define i16 @test_atomic_cmpxchg_i16_acquire(i16 %wanted, i16 %new) nounwind {
 
 define i32 @test_atomic_cmpxchg_i32_acquire(i32 %wanted, i32 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i32_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x2, var32
+; OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas4_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg i32* @var32, i32 %wanted, i32 %new acquire acquire
    %old = extractvalue { i32, i1 } %pair, 0
 
@@ -1917,6 +3283,14 @@ define i32 @test_atomic_cmpxchg_i32_acquire(i32 %wanted, i32 %new) nounwind {
 
 define i64 @test_atomic_cmpxchg_i64_acquire(i64 %wanted, i64 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i64_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i64_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x2, var64
+; OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas8_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg i64* @var64, i64 %wanted, i64 %new acquire acquire
    %old = extractvalue { i64, i1 } %pair, 0
 
@@ -1932,6 +3306,14 @@ define i64 @test_atomic_cmpxchg_i64_acquire(i64 %wanted, i64 %new) nounwind {
 
 define i128 @test_atomic_cmpxchg_i128_acquire(i128 %wanted, i128 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i128_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i128_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x4, var128
+; OUTLINE-ATOMICS-NEXT:    add x4, x4, :lo12:var128
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas16_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg i128* @var128, i128 %wanted, i128 %new acquire acquire
    %old = extractvalue { i128, i1 } %pair, 0
 
@@ -1947,6 +3329,14 @@ define i128 @test_atomic_cmpxchg_i128_acquire(i128 %wanted, i128 %new) nounwind
 
 define i8 @test_atomic_cmpxchg_i8_monotonic(i8 %wanted, i8 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i8_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i8_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x2, var8
+; OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas1_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg i8* @var8, i8 %wanted, i8 %new monotonic monotonic
    %old = extractvalue { i8, i1 } %pair, 0
 
@@ -1962,6 +3352,14 @@ define i8 @test_atomic_cmpxchg_i8_monotonic(i8 %wanted, i8 %new) nounwind {
 
 define i16 @test_atomic_cmpxchg_i16_monotonic(i16 %wanted, i16 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i16_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i16_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x2, var16
+; OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas2_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg i16* @var16, i16 %wanted, i16 %new monotonic monotonic
    %old = extractvalue { i16, i1 } %pair, 0
 
@@ -1977,6 +3375,14 @@ define i16 @test_atomic_cmpxchg_i16_monotonic(i16 %wanted, i16 %new) nounwind {
 
 define i32 @test_atomic_cmpxchg_i32_monotonic(i32 %wanted, i32 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i32_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x2, var32
+; OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas4_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg i32* @var32, i32 %wanted, i32 %new monotonic monotonic
    %old = extractvalue { i32, i1 } %pair, 0
 
@@ -1992,6 +3398,14 @@ define i32 @test_atomic_cmpxchg_i32_monotonic(i32 %wanted, i32 %new) nounwind {
 
 define i64 @test_atomic_cmpxchg_i64_monotonic(i64 %wanted, i64 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i64_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i64_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x2, var64
+; OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas8_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic monotonic
    %old = extractvalue { i64, i1 } %pair, 0
 
@@ -2007,6 +3421,14 @@ define i64 @test_atomic_cmpxchg_i64_monotonic(i64 %wanted, i64 %new) nounwind {
 
 define i128 @test_atomic_cmpxchg_i128_monotonic(i128 %wanted, i128 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i128_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i128_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x4, var128
+; OUTLINE-ATOMICS-NEXT:    add x4, x4, :lo12:var128
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas16_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg i128* @var128, i128 %wanted, i128 %new monotonic monotonic
    %old = extractvalue { i128, i1 } %pair, 0
 
@@ -2022,6 +3444,14 @@ define i128 @test_atomic_cmpxchg_i128_monotonic(i128 %wanted, i128 %new) nounwin
 
 define i8 @test_atomic_cmpxchg_i8_seq_cst(i8 %wanted, i8 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i8_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i8_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x2, var8
+; OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas1_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg i8* @var8, i8 %wanted, i8 %new seq_cst seq_cst
    %old = extractvalue { i8, i1 } %pair, 0
 
@@ -2037,6 +3467,14 @@ define i8 @test_atomic_cmpxchg_i8_seq_cst(i8 %wanted, i8 %new) nounwind {
 
 define i16 @test_atomic_cmpxchg_i16_seq_cst(i16 %wanted, i16 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i16_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i16_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x2, var16
+; OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas2_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst seq_cst
    %old = extractvalue { i16, i1 } %pair, 0
 
@@ -2052,6 +3490,14 @@ define i16 @test_atomic_cmpxchg_i16_seq_cst(i16 %wanted, i16 %new) nounwind {
 
 define i32 @test_atomic_cmpxchg_i32_seq_cst(i32 %wanted, i32 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i32_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x2, var32
+; OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg i32* @var32, i32 %wanted, i32 %new seq_cst seq_cst
    %old = extractvalue { i32, i1 } %pair, 0
 
@@ -2067,6 +3513,14 @@ define i32 @test_atomic_cmpxchg_i32_seq_cst(i32 %wanted, i32 %new) nounwind {
 
 define i64 @test_atomic_cmpxchg_i64_seq_cst(i64 %wanted, i64 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i64_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i64_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x2, var64
+; OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg i64* @var64, i64 %wanted, i64 %new seq_cst seq_cst
    %old = extractvalue { i64, i1 } %pair, 0
 
@@ -2082,6 +3536,14 @@ define i64 @test_atomic_cmpxchg_i64_seq_cst(i64 %wanted, i64 %new) nounwind {
 
 define i128 @test_atomic_cmpxchg_i128_seq_cst(i128 %wanted, i128 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i128_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i128_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x4, var128
+; OUTLINE-ATOMICS-NEXT:    add x4, x4, :lo12:var128
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas16_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg i128* @var128, i128 %wanted, i128 %new seq_cst seq_cst
    %old = extractvalue { i128, i1 } %pair, 0
 
@@ -2097,6 +3559,21 @@ define i128 @test_atomic_cmpxchg_i128_seq_cst(i128 %wanted, i128 %new) nounwind
 
 define i8 @test_atomic_load_max_i8_acq_rel(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i8_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:  .LBB158_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxrb w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
+; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
+; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB158_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max i8* @var8, i8 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -2110,6 +3587,21 @@ define i8 @test_atomic_load_max_i8_acq_rel(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_max_i16_acq_rel(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i16_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:  .LBB159_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxrh w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    sxth w8, w10
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
+; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
+; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB159_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max i16* @var16, i16 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -2123,6 +3615,20 @@ define i16 @test_atomic_load_max_i16_acq_rel(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_max_i32_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i32_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB160_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, gt
+; OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB160_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max i32* @var32, i32 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -2136,6 +3642,20 @@ define i32 @test_atomic_load_max_i32_acq_rel(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_max_i64_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i64_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB161_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, gt
+; OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB161_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max i64* @var64, i64 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -2149,6 +3669,19 @@ define i64 @test_atomic_load_max_i64_acq_rel(i64 %offset) nounwind {
 
 define void @test_atomic_load_max_i32_noret_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i32_noret_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB162_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, gt
+; OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB162_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw max i32* @var32, i32 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -2161,6 +3694,19 @@ define void @test_atomic_load_max_i32_noret_acq_rel(i32 %offset) nounwind {
 
 define void @test_atomic_load_max_i64_noret_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i64_noret_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB163_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, gt
+; OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB163_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw max i64* @var64, i64 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -2173,6 +3719,21 @@ define void @test_atomic_load_max_i64_noret_acq_rel(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_max_i8_acquire(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i8_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:  .LBB164_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxrb w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
+; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
+; OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB164_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max i8* @var8, i8 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -2186,6 +3747,21 @@ define i8 @test_atomic_load_max_i8_acquire(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_max_i16_acquire(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i16_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:  .LBB165_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxrh w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    sxth w8, w10
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
+; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
+; OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB165_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max i16* @var16, i16 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -2199,6 +3775,20 @@ define i16 @test_atomic_load_max_i16_acquire(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_max_i32_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i32_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB166_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, gt
+; OUTLINE-ATOMICS-NEXT:    stxr w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB166_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max i32* @var32, i32 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -2212,6 +3802,20 @@ define i32 @test_atomic_load_max_i32_acquire(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_max_i64_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i64_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB167_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, gt
+; OUTLINE-ATOMICS-NEXT:    stxr w11, x10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB167_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max i64* @var64, i64 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -2225,6 +3829,19 @@ define i64 @test_atomic_load_max_i64_acquire(i64 %offset) nounwind {
 
 define void @test_atomic_load_max_i32_noret_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i32_noret_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB168_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, gt
+; OUTLINE-ATOMICS-NEXT:    stxr w10, w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB168_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw max i32* @var32, i32 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -2237,6 +3854,19 @@ define void @test_atomic_load_max_i32_noret_acquire(i32 %offset) nounwind {
 
 define void @test_atomic_load_max_i64_noret_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i64_noret_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB169_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, gt
+; OUTLINE-ATOMICS-NEXT:    stxr w10, x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB169_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw max i64* @var64, i64 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -2249,6 +3879,21 @@ define void @test_atomic_load_max_i64_noret_acquire(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_max_i8_monotonic(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i8_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:  .LBB170_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxrb w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
+; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
+; OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB170_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max i8* @var8, i8 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -2262,6 +3907,21 @@ define i8 @test_atomic_load_max_i8_monotonic(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_max_i16_monotonic(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i16_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:  .LBB171_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxrh w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    sxth w8, w10
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
+; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
+; OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB171_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max i16* @var16, i16 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -2275,6 +3935,20 @@ define i16 @test_atomic_load_max_i16_monotonic(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_max_i32_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i32_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB172_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxr w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, gt
+; OUTLINE-ATOMICS-NEXT:    stxr w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB172_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max i32* @var32, i32 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -2288,6 +3962,20 @@ define i32 @test_atomic_load_max_i32_monotonic(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_max_i64_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i64_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB173_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxr x8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, gt
+; OUTLINE-ATOMICS-NEXT:    stxr w11, x10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB173_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max i64* @var64, i64 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -2301,6 +3989,19 @@ define i64 @test_atomic_load_max_i64_monotonic(i64 %offset) nounwind {
 
 define void @test_atomic_load_max_i32_noret_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i32_noret_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB174_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxr w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, gt
+; OUTLINE-ATOMICS-NEXT:    stxr w10, w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB174_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw max i32* @var32, i32 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -2313,6 +4014,19 @@ define void @test_atomic_load_max_i32_noret_monotonic(i32 %offset) nounwind {
 
 define void @test_atomic_load_max_i64_noret_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i64_noret_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB175_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxr x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, gt
+; OUTLINE-ATOMICS-NEXT:    stxr w10, x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB175_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw max i64* @var64, i64 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -2325,6 +4039,21 @@ define void @test_atomic_load_max_i64_noret_monotonic(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_max_i8_release(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i8_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:  .LBB176_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxrb w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
+; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
+; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB176_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max i8* @var8, i8 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -2338,6 +4067,21 @@ define i8 @test_atomic_load_max_i8_release(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_max_i16_release(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i16_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:  .LBB177_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxrh w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    sxth w8, w10
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
+; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
+; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB177_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max i16* @var16, i16 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -2351,6 +4095,20 @@ define i16 @test_atomic_load_max_i16_release(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_max_i32_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i32_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB178_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxr w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, gt
+; OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB178_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max i32* @var32, i32 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -2364,6 +4122,20 @@ define i32 @test_atomic_load_max_i32_release(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_max_i64_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i64_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB179_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxr x8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, gt
+; OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB179_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max i64* @var64, i64 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -2377,6 +4149,19 @@ define i64 @test_atomic_load_max_i64_release(i64 %offset) nounwind {
 
 define void @test_atomic_load_max_i32_noret_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i32_noret_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB180_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxr w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, gt
+; OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB180_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw max i32* @var32, i32 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -2389,6 +4174,19 @@ define void @test_atomic_load_max_i32_noret_release(i32 %offset) nounwind {
 
 define void @test_atomic_load_max_i64_noret_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i64_noret_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB181_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxr x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, gt
+; OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB181_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw max i64* @var64, i64 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -2401,6 +4199,21 @@ define void @test_atomic_load_max_i64_noret_release(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_max_i8_seq_cst(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i8_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:  .LBB182_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxrb w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
+; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
+; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB182_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -2414,6 +4227,21 @@ define i8 @test_atomic_load_max_i8_seq_cst(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_max_i16_seq_cst(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i16_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:  .LBB183_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxrh w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    sxth w8, w10
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
+; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
+; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB183_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max i16* @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -2427,6 +4255,20 @@ define i16 @test_atomic_load_max_i16_seq_cst(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_max_i32_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i32_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB184_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, gt
+; OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB184_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -2440,6 +4282,20 @@ define i32 @test_atomic_load_max_i32_seq_cst(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_max_i64_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i64_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB185_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, gt
+; OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB185_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -2453,6 +4309,19 @@ define i64 @test_atomic_load_max_i64_seq_cst(i64 %offset) nounwind {
 
 define void @test_atomic_load_max_i32_noret_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i32_noret_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB186_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, gt
+; OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB186_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw max i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -2465,6 +4334,19 @@ define void @test_atomic_load_max_i32_noret_seq_cst(i32 %offset) nounwind {
 
 define void @test_atomic_load_max_i64_noret_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i64_noret_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB187_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, gt
+; OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB187_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw max i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -2477,6 +4359,21 @@ define void @test_atomic_load_max_i64_noret_seq_cst(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_min_i8_acq_rel(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i8_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:  .LBB188_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxrb w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
+; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
+; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB188_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min i8* @var8, i8 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -2490,6 +4387,21 @@ define i8 @test_atomic_load_min_i8_acq_rel(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_min_i16_acq_rel(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i16_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:  .LBB189_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxrh w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    sxth w8, w10
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
+; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
+; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB189_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min i16* @var16, i16 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -2503,6 +4415,20 @@ define i16 @test_atomic_load_min_i16_acq_rel(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_min_i32_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i32_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB190_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, le
+; OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB190_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min i32* @var32, i32 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -2516,6 +4442,20 @@ define i32 @test_atomic_load_min_i32_acq_rel(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_min_i64_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i64_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB191_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, le
+; OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB191_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min i64* @var64, i64 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -2529,6 +4469,19 @@ define i64 @test_atomic_load_min_i64_acq_rel(i64 %offset) nounwind {
 
 define void @test_atomic_load_min_i32_noret_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i32_noret_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB192_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, le
+; OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB192_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw min i32* @var32, i32 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -2541,6 +4494,19 @@ define void @test_atomic_load_min_i32_noret_acq_rel(i32 %offset) nounwind {
 
 define void @test_atomic_load_min_i64_noret_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i64_noret_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB193_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, le
+; OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB193_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw min i64* @var64, i64 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -2553,6 +4519,21 @@ define void @test_atomic_load_min_i64_noret_acq_rel(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_min_i8_acquire(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i8_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:  .LBB194_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxrb w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
+; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
+; OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB194_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min i8* @var8, i8 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -2566,6 +4547,21 @@ define i8 @test_atomic_load_min_i8_acquire(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_min_i16_acquire(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i16_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:  .LBB195_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxrh w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    sxth w8, w10
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
+; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
+; OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB195_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min i16* @var16, i16 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -2579,6 +4575,20 @@ define i16 @test_atomic_load_min_i16_acquire(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_min_i32_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i32_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB196_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, le
+; OUTLINE-ATOMICS-NEXT:    stxr w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB196_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min i32* @var32, i32 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -2592,6 +4602,20 @@ define i32 @test_atomic_load_min_i32_acquire(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_min_i64_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i64_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB197_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, le
+; OUTLINE-ATOMICS-NEXT:    stxr w11, x10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB197_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min i64* @var64, i64 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -2605,6 +4629,19 @@ define i64 @test_atomic_load_min_i64_acquire(i64 %offset) nounwind {
 
 define void @test_atomic_load_min_i32_noret_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i32_noret_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB198_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, le
+; OUTLINE-ATOMICS-NEXT:    stxr w10, w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB198_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw min i32* @var32, i32 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -2617,6 +4654,19 @@ define void @test_atomic_load_min_i32_noret_acquire(i32 %offset) nounwind {
 
 define void @test_atomic_load_min_i64_noret_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i64_noret_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB199_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, le
+; OUTLINE-ATOMICS-NEXT:    stxr w10, x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB199_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw min i64* @var64, i64 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -2629,6 +4679,21 @@ define void @test_atomic_load_min_i64_noret_acquire(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_min_i8_monotonic(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i8_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:  .LBB200_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxrb w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
+; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
+; OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB200_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min i8* @var8, i8 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -2642,6 +4707,21 @@ define i8 @test_atomic_load_min_i8_monotonic(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_min_i16_monotonic(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i16_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:  .LBB201_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxrh w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    sxth w8, w10
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
+; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
+; OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB201_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min i16* @var16, i16 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -2655,6 +4735,20 @@ define i16 @test_atomic_load_min_i16_monotonic(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_min_i32_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i32_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB202_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxr w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, le
+; OUTLINE-ATOMICS-NEXT:    stxr w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB202_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min i32* @var32, i32 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -2668,6 +4762,20 @@ define i32 @test_atomic_load_min_i32_monotonic(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_min_i64_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i64_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB203_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxr x8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, le
+; OUTLINE-ATOMICS-NEXT:    stxr w11, x10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB203_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min i64* @var64, i64 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -2681,6 +4789,19 @@ define i64 @test_atomic_load_min_i64_monotonic(i64 %offset) nounwind {
 
 define void @test_atomic_load_min_i32_noret_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i32_noret_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB204_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxr w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, le
+; OUTLINE-ATOMICS-NEXT:    stxr w10, w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB204_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw min i32* @var32, i32 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -2693,6 +4814,19 @@ define void @test_atomic_load_min_i32_noret_monotonic(i32 %offset) nounwind {
 
 define void @test_atomic_load_min_i64_noret_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i64_noret_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB205_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxr x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, le
+; OUTLINE-ATOMICS-NEXT:    stxr w10, x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB205_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw min i64* @var64, i64 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -2705,6 +4839,21 @@ define void @test_atomic_load_min_i64_noret_monotonic(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_min_i8_release(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i8_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:  .LBB206_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxrb w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
+; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
+; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB206_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min i8* @var8, i8 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -2718,6 +4867,21 @@ define i8 @test_atomic_load_min_i8_release(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_min_i16_release(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i16_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:  .LBB207_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxrh w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    sxth w8, w10
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
+; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
+; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB207_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min i16* @var16, i16 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -2731,6 +4895,20 @@ define i16 @test_atomic_load_min_i16_release(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_min_i32_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i32_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB208_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxr w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, le
+; OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB208_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min i32* @var32, i32 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -2744,6 +4922,20 @@ define i32 @test_atomic_load_min_i32_release(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_min_i64_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i64_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB209_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxr x8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, le
+; OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB209_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min i64* @var64, i64 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -2757,6 +4949,19 @@ define i64 @test_atomic_load_min_i64_release(i64 %offset) nounwind {
 
 define void @test_atomic_load_min_i32_noret_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i32_noret_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB210_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxr w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, le
+; OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB210_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw min i32* @var32, i32 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -2769,6 +4974,19 @@ define void @test_atomic_load_min_i32_noret_release(i32 %offset) nounwind {
 
 define void @test_atomic_load_min_i64_noret_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i64_noret_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB211_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxr x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, le
+; OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB211_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw min i64* @var64, i64 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -2781,6 +4999,21 @@ define void @test_atomic_load_min_i64_noret_release(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_min_i8_seq_cst(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i8_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:  .LBB212_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxrb w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
+; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
+; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB212_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -2794,6 +5027,21 @@ define i8 @test_atomic_load_min_i8_seq_cst(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_min_i16_seq_cst(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i16_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:  .LBB213_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxrh w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    sxth w8, w10
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
+; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
+; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB213_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min i16* @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -2807,6 +5055,20 @@ define i16 @test_atomic_load_min_i16_seq_cst(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_min_i32_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i32_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB214_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, le
+; OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB214_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -2820,6 +5082,20 @@ define i32 @test_atomic_load_min_i32_seq_cst(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_min_i64_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i64_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB215_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, le
+; OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB215_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -2833,6 +5109,19 @@ define i64 @test_atomic_load_min_i64_seq_cst(i64 %offset) nounwind {
 
 define void @test_atomic_load_min_i32_noret_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i32_noret_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB216_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, le
+; OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB216_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw min i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -2845,6 +5134,19 @@ define void @test_atomic_load_min_i32_noret_seq_cst(i32 %offset) nounwind {
 
 define void @test_atomic_load_min_i64_noret_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i64_noret_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB217_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, le
+; OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB217_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw min i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -2857,6 +5159,14 @@ define void @test_atomic_load_min_i64_noret_seq_cst(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_or_i8_acq_rel(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i8_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset1_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or i8* @var8, i8 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -2870,6 +5180,14 @@ define i8 @test_atomic_load_or_i8_acq_rel(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_or_i16_acq_rel(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i16_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset2_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or i16* @var16, i16 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -2883,6 +5201,14 @@ define i16 @test_atomic_load_or_i16_acq_rel(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_or_i32_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i32_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or i32* @var32, i32 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -2896,6 +5222,14 @@ define i32 @test_atomic_load_or_i32_acq_rel(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_or_i64_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i64_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or i64* @var64, i64 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -2909,6 +5243,14 @@ define i64 @test_atomic_load_or_i64_acq_rel(i64 %offset) nounwind {
 
 define void @test_atomic_load_or_i32_noret_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i32_noret_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw or i32* @var32, i32 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -2921,6 +5263,14 @@ define void @test_atomic_load_or_i32_noret_acq_rel(i32 %offset) nounwind {
 
 define void @test_atomic_load_or_i64_noret_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i64_noret_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw or i64* @var64, i64 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -2933,6 +5283,14 @@ define void @test_atomic_load_or_i64_noret_acq_rel(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_or_i8_acquire(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i8_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset1_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or i8* @var8, i8 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -2946,6 +5304,14 @@ define i8 @test_atomic_load_or_i8_acquire(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_or_i16_acquire(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i16_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset2_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or i16* @var16, i16 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -2959,6 +5325,14 @@ define i16 @test_atomic_load_or_i16_acquire(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_or_i32_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i32_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or i32* @var32, i32 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -2972,6 +5346,14 @@ define i32 @test_atomic_load_or_i32_acquire(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_or_i64_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i64_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or i64* @var64, i64 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -2985,6 +5367,14 @@ define i64 @test_atomic_load_or_i64_acquire(i64 %offset) nounwind {
 
 define void @test_atomic_load_or_i32_noret_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i32_noret_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw or i32* @var32, i32 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -2997,6 +5387,14 @@ define void @test_atomic_load_or_i32_noret_acquire(i32 %offset) nounwind {
 
 define void @test_atomic_load_or_i64_noret_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i64_noret_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw or i64* @var64, i64 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -3009,6 +5407,14 @@ define void @test_atomic_load_or_i64_noret_acquire(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_or_i8_monotonic(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i8_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset1_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or i8* @var8, i8 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -3022,6 +5428,14 @@ define i8 @test_atomic_load_or_i8_monotonic(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_or_i16_monotonic(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i16_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset2_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or i16* @var16, i16 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -3035,6 +5449,14 @@ define i16 @test_atomic_load_or_i16_monotonic(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_or_i32_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i32_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or i32* @var32, i32 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -3048,6 +5470,14 @@ define i32 @test_atomic_load_or_i32_monotonic(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_or_i64_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i64_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or i64* @var64, i64 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -3061,6 +5491,14 @@ define i64 @test_atomic_load_or_i64_monotonic(i64 %offset) nounwind {
 
 define void @test_atomic_load_or_i32_noret_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i32_noret_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw or i32* @var32, i32 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -3073,6 +5511,14 @@ define void @test_atomic_load_or_i32_noret_monotonic(i32 %offset) nounwind {
 
 define void @test_atomic_load_or_i64_noret_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i64_noret_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw or i64* @var64, i64 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -3085,6 +5531,14 @@ define void @test_atomic_load_or_i64_noret_monotonic(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_or_i8_release(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i8_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset1_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or i8* @var8, i8 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -3098,6 +5552,14 @@ define i8 @test_atomic_load_or_i8_release(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_or_i16_release(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i16_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset2_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or i16* @var16, i16 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -3111,6 +5573,14 @@ define i16 @test_atomic_load_or_i16_release(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_or_i32_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i32_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or i32* @var32, i32 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -3124,6 +5594,14 @@ define i32 @test_atomic_load_or_i32_release(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_or_i64_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i64_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or i64* @var64, i64 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -3137,6 +5615,14 @@ define i64 @test_atomic_load_or_i64_release(i64 %offset) nounwind {
 
 define void @test_atomic_load_or_i32_noret_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i32_noret_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw or i32* @var32, i32 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -3149,6 +5635,14 @@ define void @test_atomic_load_or_i32_noret_release(i32 %offset) nounwind {
 
 define void @test_atomic_load_or_i64_noret_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i64_noret_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw or i64* @var64, i64 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -3161,6 +5655,14 @@ define void @test_atomic_load_or_i64_noret_release(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_or_i8_seq_cst(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i8_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset1_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -3174,6 +5676,14 @@ define i8 @test_atomic_load_or_i8_seq_cst(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_or_i16_seq_cst(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i16_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset2_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or i16* @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -3187,6 +5697,14 @@ define i16 @test_atomic_load_or_i16_seq_cst(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_or_i32_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i32_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -3200,6 +5718,14 @@ define i32 @test_atomic_load_or_i32_seq_cst(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_or_i64_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i64_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -3213,6 +5739,14 @@ define i64 @test_atomic_load_or_i64_seq_cst(i64 %offset) nounwind {
 
 define void @test_atomic_load_or_i32_noret_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i32_noret_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw or i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -3225,6 +5759,14 @@ define void @test_atomic_load_or_i32_noret_seq_cst(i32 %offset) nounwind {
 
 define void @test_atomic_load_or_i64_noret_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i64_noret_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw or i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -3237,6 +5779,15 @@ define void @test_atomic_load_or_i64_noret_seq_cst(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_sub_i8_acq_rel(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i8_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub i8* @var8, i8 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
@@ -3251,6 +5802,15 @@ define i8 @test_atomic_load_sub_i8_acq_rel(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_sub_i16_acq_rel(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i16_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub i16* @var16, i16 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
@@ -3265,6 +5825,15 @@ define i16 @test_atomic_load_sub_i16_acq_rel(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_sub_i32_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i32_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub i32* @var32, i32 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
@@ -3279,6 +5848,15 @@ define i32 @test_atomic_load_sub_i32_acq_rel(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_sub_i64_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i64_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg x0, x0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub i64* @var64, i64 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
@@ -3293,6 +5871,15 @@ define i64 @test_atomic_load_sub_i64_acq_rel(i64 %offset) nounwind {
 
 define void @test_atomic_load_sub_i32_noret_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i32_noret_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw sub i32* @var32, i32 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
@@ -3307,6 +5894,15 @@ define void @test_atomic_load_sub_i32_noret_acq_rel(i32 %offset) nounwind {
 
 define void @test_atomic_load_sub_i64_noret_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i64_noret_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg x0, x0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw sub i64* @var64, i64 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
@@ -3321,6 +5917,15 @@ define void @test_atomic_load_sub_i64_noret_acq_rel(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_sub_i8_acquire(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i8_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub i8* @var8, i8 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
@@ -3335,6 +5940,15 @@ define i8 @test_atomic_load_sub_i8_acquire(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_sub_i16_acquire(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i16_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub i16* @var16, i16 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
@@ -3349,6 +5963,15 @@ define i16 @test_atomic_load_sub_i16_acquire(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_sub_i32_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i32_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub i32* @var32, i32 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
@@ -3363,6 +5986,15 @@ define i32 @test_atomic_load_sub_i32_acquire(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_sub_i64_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i64_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg x0, x0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub i64* @var64, i64 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
@@ -3377,6 +6009,15 @@ define i64 @test_atomic_load_sub_i64_acquire(i64 %offset) nounwind {
 
 define void @test_atomic_load_sub_i32_noret_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i32_noret_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw sub i32* @var32, i32 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
@@ -3391,6 +6032,15 @@ define void @test_atomic_load_sub_i32_noret_acquire(i32 %offset) nounwind {
 
 define void @test_atomic_load_sub_i64_noret_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i64_noret_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg x0, x0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw sub i64* @var64, i64 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
@@ -3405,6 +6055,15 @@ define void @test_atomic_load_sub_i64_noret_acquire(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_sub_i8_monotonic(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i8_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub i8* @var8, i8 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
@@ -3419,6 +6078,15 @@ define i8 @test_atomic_load_sub_i8_monotonic(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_sub_i16_monotonic(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i16_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub i16* @var16, i16 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
@@ -3433,6 +6101,15 @@ define i16 @test_atomic_load_sub_i16_monotonic(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_sub_i32_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i32_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub i32* @var32, i32 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
@@ -3447,6 +6124,15 @@ define i32 @test_atomic_load_sub_i32_monotonic(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_sub_i64_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i64_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg x0, x0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub i64* @var64, i64 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
@@ -3461,6 +6147,15 @@ define i64 @test_atomic_load_sub_i64_monotonic(i64 %offset) nounwind {
 
 define void @test_atomic_load_sub_i32_noret_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i32_noret_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw sub i32* @var32, i32 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
@@ -3475,6 +6170,15 @@ define void @test_atomic_load_sub_i32_noret_monotonic(i32 %offset) nounwind {
 
 define void @test_atomic_load_sub_i64_noret_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i64_noret_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg x0, x0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw sub i64* @var64, i64 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
@@ -3489,6 +6193,15 @@ define void @test_atomic_load_sub_i64_noret_monotonic(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_sub_i8_release(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i8_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub i8* @var8, i8 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
@@ -3503,6 +6216,15 @@ define i8 @test_atomic_load_sub_i8_release(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_sub_i16_release(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i16_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub i16* @var16, i16 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
@@ -3517,6 +6239,15 @@ define i16 @test_atomic_load_sub_i16_release(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_sub_i32_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i32_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub i32* @var32, i32 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
@@ -3531,6 +6262,15 @@ define i32 @test_atomic_load_sub_i32_release(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_sub_i64_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i64_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg x0, x0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub i64* @var64, i64 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
@@ -3545,6 +6285,15 @@ define i64 @test_atomic_load_sub_i64_release(i64 %offset) nounwind {
 
 define void @test_atomic_load_sub_i32_noret_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i32_noret_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw sub i32* @var32, i32 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
@@ -3559,6 +6308,15 @@ define void @test_atomic_load_sub_i32_noret_release(i32 %offset) nounwind {
 
 define void @test_atomic_load_sub_i64_noret_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i64_noret_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg x0, x0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw sub i64* @var64, i64 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
@@ -3573,6 +6331,15 @@ define void @test_atomic_load_sub_i64_noret_release(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_sub_i8_seq_cst(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i8_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
@@ -3587,6 +6354,15 @@ define i8 @test_atomic_load_sub_i8_seq_cst(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_sub_i16_seq_cst(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i16_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub i16* @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
@@ -3601,6 +6377,15 @@ define i16 @test_atomic_load_sub_i16_seq_cst(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_sub_i32_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i32_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
@@ -3615,6 +6400,15 @@ define i32 @test_atomic_load_sub_i32_seq_cst(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_sub_i64_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i64_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg x0, x0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
@@ -3629,6 +6423,15 @@ define i64 @test_atomic_load_sub_i64_seq_cst(i64 %offset) nounwind {
 
 define void @test_atomic_load_sub_i32_noret_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i32_noret_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw sub i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
@@ -3643,6 +6446,15 @@ define void @test_atomic_load_sub_i32_noret_seq_cst(i32 %offset) nounwind {
 
 define void @test_atomic_load_sub_i64_noret_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i64_noret_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    neg x0, x0
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw sub i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
@@ -3657,6 +6469,14 @@ define void @test_atomic_load_sub_i64_noret_seq_cst(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_xchg_i8_acq_rel(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i8_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp1_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg i8* @var8, i8 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -3670,6 +6490,14 @@ define i8 @test_atomic_load_xchg_i8_acq_rel(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_xchg_i16_acq_rel(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i16_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp2_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg i16* @var16, i16 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -3683,6 +6511,14 @@ define i16 @test_atomic_load_xchg_i16_acq_rel(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_xchg_i32_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i32_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg i32* @var32, i32 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -3696,6 +6532,14 @@ define i32 @test_atomic_load_xchg_i32_acq_rel(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_xchg_i64_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i64_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg i64* @var64, i64 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -3709,6 +6553,14 @@ define i64 @test_atomic_load_xchg_i64_acq_rel(i64 %offset) nounwind {
 
 define void @test_atomic_load_xchg_i32_noret_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i32_noret_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xchg i32* @var32, i32 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -3722,6 +6574,14 @@ define void @test_atomic_load_xchg_i32_noret_acq_rel(i32 %offset) nounwind {
 
 define void @test_atomic_load_xchg_i64_noret_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i64_noret_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xchg i64* @var64, i64 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -3735,6 +6595,14 @@ define void @test_atomic_load_xchg_i64_noret_acq_rel(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_xchg_i8_acquire(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i8_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp1_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg i8* @var8, i8 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -3748,6 +6616,14 @@ define i8 @test_atomic_load_xchg_i8_acquire(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_xchg_i16_acquire(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i16_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp2_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg i16* @var16, i16 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -3761,6 +6637,14 @@ define i16 @test_atomic_load_xchg_i16_acquire(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_xchg_i32_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i32_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg i32* @var32, i32 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -3774,6 +6658,14 @@ define i32 @test_atomic_load_xchg_i32_acquire(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_xchg_i64_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i64_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg i64* @var64, i64 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -3787,6 +6679,14 @@ define i64 @test_atomic_load_xchg_i64_acquire(i64 %offset) nounwind {
 
 define void @test_atomic_load_xchg_i32_noret_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i32_noret_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xchg i32* @var32, i32 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -3800,6 +6700,14 @@ define void @test_atomic_load_xchg_i32_noret_acquire(i32 %offset) nounwind {
 
 define void @test_atomic_load_xchg_i64_noret_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i64_noret_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xchg i64* @var64, i64 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -3813,6 +6721,14 @@ define void @test_atomic_load_xchg_i64_noret_acquire(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_xchg_i8_monotonic(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i8_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp1_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg i8* @var8, i8 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -3826,6 +6742,14 @@ define i8 @test_atomic_load_xchg_i8_monotonic(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_xchg_i16_monotonic(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i16_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp2_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg i16* @var16, i16 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -3839,6 +6763,14 @@ define i16 @test_atomic_load_xchg_i16_monotonic(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_xchg_i32_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i32_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg i32* @var32, i32 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -3852,6 +6784,14 @@ define i32 @test_atomic_load_xchg_i32_monotonic(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_xchg_i64_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i64_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg i64* @var64, i64 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -3865,6 +6805,14 @@ define i64 @test_atomic_load_xchg_i64_monotonic(i64 %offset) nounwind {
 
 define void @test_atomic_load_xchg_i32_noret_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i32_noret_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xchg i32* @var32, i32 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -3878,6 +6826,14 @@ define void @test_atomic_load_xchg_i32_noret_monotonic(i32 %offset) nounwind {
 
 define void @test_atomic_load_xchg_i64_noret_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i64_noret_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xchg i64* @var64, i64 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -3891,6 +6847,14 @@ define void @test_atomic_load_xchg_i64_noret_monotonic(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_xchg_i8_release(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i8_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp1_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg i8* @var8, i8 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -3904,6 +6868,14 @@ define i8 @test_atomic_load_xchg_i8_release(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_xchg_i16_release(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i16_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp2_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg i16* @var16, i16 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -3917,6 +6889,14 @@ define i16 @test_atomic_load_xchg_i16_release(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_xchg_i32_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i32_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg i32* @var32, i32 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -3930,6 +6910,14 @@ define i32 @test_atomic_load_xchg_i32_release(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_xchg_i64_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i64_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg i64* @var64, i64 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -3943,6 +6931,14 @@ define i64 @test_atomic_load_xchg_i64_release(i64 %offset) nounwind {
 
 define void @test_atomic_load_xchg_i32_noret_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i32_noret_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xchg i32* @var32, i32 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -3956,6 +6952,14 @@ define void @test_atomic_load_xchg_i32_noret_release(i32 %offset) nounwind {
 
 define void @test_atomic_load_xchg_i64_noret_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i64_noret_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xchg i64* @var64, i64 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -3969,6 +6973,14 @@ define void @test_atomic_load_xchg_i64_noret_release(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_xchg_i8_seq_cst(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i8_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp1_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -3982,6 +6994,14 @@ define i8 @test_atomic_load_xchg_i8_seq_cst(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_xchg_i16_seq_cst(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i16_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp2_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg i16* @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -3995,6 +7015,14 @@ define i16 @test_atomic_load_xchg_i16_seq_cst(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_xchg_i32_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i32_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -4008,6 +7036,14 @@ define i32 @test_atomic_load_xchg_i32_seq_cst(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_xchg_i64_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i64_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -4021,6 +7057,14 @@ define i64 @test_atomic_load_xchg_i64_seq_cst(i64 %offset) nounwind {
 
 define void @test_atomic_load_xchg_i32_noret_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i32_noret_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xchg i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -4034,6 +7078,14 @@ define void @test_atomic_load_xchg_i32_noret_seq_cst(i32 %offset) nounwind {
 
 define void @test_atomic_load_xchg_i64_noret_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i64_noret_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xchg i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -4047,6 +7099,20 @@ define void @test_atomic_load_xchg_i64_noret_seq_cst(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_umax_i8_acq_rel(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i8_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:  .LBB308_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxrb w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxtb
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB308_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i8* @var8, i8 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -4060,6 +7126,20 @@ define i8 @test_atomic_load_umax_i8_acq_rel(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_umax_i16_acq_rel(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i16_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:  .LBB309_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxrh w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxth
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB309_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i16* @var16, i16 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -4073,6 +7153,20 @@ define i16 @test_atomic_load_umax_i16_acq_rel(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_umax_i32_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i32_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB310_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB310_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i32* @var32, i32 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -4086,6 +7180,20 @@ define i32 @test_atomic_load_umax_i32_acq_rel(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_umax_i64_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i64_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB311_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, hi
+; OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB311_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i64* @var64, i64 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -4099,6 +7207,19 @@ define i64 @test_atomic_load_umax_i64_acq_rel(i64 %offset) nounwind {
 
 define void @test_atomic_load_umax_i32_noret_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i32_noret_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB312_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, hi
+; OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB312_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umax i32* @var32, i32 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -4111,6 +7232,19 @@ define void @test_atomic_load_umax_i32_noret_acq_rel(i32 %offset) nounwind {
 
 define void @test_atomic_load_umax_i64_noret_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i64_noret_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB313_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, hi
+; OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB313_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umax i64* @var64, i64 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -4123,6 +7257,20 @@ define void @test_atomic_load_umax_i64_noret_acq_rel(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_umax_i8_acquire(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i8_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:  .LBB314_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxrb w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxtb
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB314_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i8* @var8, i8 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -4136,6 +7284,20 @@ define i8 @test_atomic_load_umax_i8_acquire(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_umax_i16_acquire(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i16_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:  .LBB315_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxrh w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxth
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB315_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i16* @var16, i16 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -4149,6 +7311,20 @@ define i16 @test_atomic_load_umax_i16_acquire(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_umax_i32_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i32_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB316_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE-ATOMICS-NEXT:    stxr w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB316_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i32* @var32, i32 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -4162,6 +7338,20 @@ define i32 @test_atomic_load_umax_i32_acquire(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_umax_i64_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i64_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB317_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, hi
+; OUTLINE-ATOMICS-NEXT:    stxr w11, x10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB317_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i64* @var64, i64 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -4175,6 +7365,19 @@ define i64 @test_atomic_load_umax_i64_acquire(i64 %offset) nounwind {
 
 define void @test_atomic_load_umax_i32_noret_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i32_noret_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB318_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, hi
+; OUTLINE-ATOMICS-NEXT:    stxr w10, w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB318_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umax i32* @var32, i32 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -4187,6 +7390,19 @@ define void @test_atomic_load_umax_i32_noret_acquire(i32 %offset) nounwind {
 
 define void @test_atomic_load_umax_i64_noret_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i64_noret_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB319_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, hi
+; OUTLINE-ATOMICS-NEXT:    stxr w10, x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB319_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umax i64* @var64, i64 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -4199,6 +7415,20 @@ define void @test_atomic_load_umax_i64_noret_acquire(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_umax_i8_monotonic(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i8_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:  .LBB320_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxrb w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxtb
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB320_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i8* @var8, i8 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -4212,6 +7442,20 @@ define i8 @test_atomic_load_umax_i8_monotonic(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_umax_i16_monotonic(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i16_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:  .LBB321_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxrh w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxth
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB321_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i16* @var16, i16 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -4225,6 +7469,20 @@ define i16 @test_atomic_load_umax_i16_monotonic(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_umax_i32_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i32_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB322_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxr w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE-ATOMICS-NEXT:    stxr w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB322_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i32* @var32, i32 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -4238,6 +7496,20 @@ define i32 @test_atomic_load_umax_i32_monotonic(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_umax_i64_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i64_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB323_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxr x8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, hi
+; OUTLINE-ATOMICS-NEXT:    stxr w11, x10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB323_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i64* @var64, i64 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -4251,6 +7523,19 @@ define i64 @test_atomic_load_umax_i64_monotonic(i64 %offset) nounwind {
 
 define void @test_atomic_load_umax_i32_noret_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i32_noret_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB324_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxr w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, hi
+; OUTLINE-ATOMICS-NEXT:    stxr w10, w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB324_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umax i32* @var32, i32 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -4263,6 +7548,19 @@ define void @test_atomic_load_umax_i32_noret_monotonic(i32 %offset) nounwind {
 
 define void @test_atomic_load_umax_i64_noret_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i64_noret_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB325_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxr x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, hi
+; OUTLINE-ATOMICS-NEXT:    stxr w10, x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB325_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umax i64* @var64, i64 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -4275,6 +7573,20 @@ define void @test_atomic_load_umax_i64_noret_monotonic(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_umax_i8_release(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i8_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:  .LBB326_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxrb w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxtb
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB326_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i8* @var8, i8 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -4288,6 +7600,20 @@ define i8 @test_atomic_load_umax_i8_release(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_umax_i16_release(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i16_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:  .LBB327_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxrh w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxth
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB327_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i16* @var16, i16 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -4301,6 +7627,20 @@ define i16 @test_atomic_load_umax_i16_release(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_umax_i32_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i32_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB328_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxr w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB328_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i32* @var32, i32 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -4314,6 +7654,20 @@ define i32 @test_atomic_load_umax_i32_release(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_umax_i64_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i64_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB329_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxr x8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, hi
+; OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB329_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i64* @var64, i64 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -4327,6 +7681,19 @@ define i64 @test_atomic_load_umax_i64_release(i64 %offset) nounwind {
 
 define void @test_atomic_load_umax_i32_noret_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i32_noret_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB330_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxr w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, hi
+; OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB330_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umax i32* @var32, i32 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -4339,6 +7706,19 @@ define void @test_atomic_load_umax_i32_noret_release(i32 %offset) nounwind {
 
 define void @test_atomic_load_umax_i64_noret_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i64_noret_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB331_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxr x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, hi
+; OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB331_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umax i64* @var64, i64 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -4351,6 +7731,20 @@ define void @test_atomic_load_umax_i64_noret_release(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_umax_i8_seq_cst(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i8_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:  .LBB332_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxrb w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxtb
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB332_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -4364,6 +7758,20 @@ define i8 @test_atomic_load_umax_i8_seq_cst(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_umax_i16_seq_cst(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i16_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:  .LBB333_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxrh w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxth
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB333_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i16* @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -4377,6 +7785,20 @@ define i16 @test_atomic_load_umax_i16_seq_cst(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_umax_i32_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i32_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB334_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB334_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -4390,6 +7812,20 @@ define i32 @test_atomic_load_umax_i32_seq_cst(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_umax_i64_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i64_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB335_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, hi
+; OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB335_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -4403,6 +7839,19 @@ define i64 @test_atomic_load_umax_i64_seq_cst(i64 %offset) nounwind {
 
 define void @test_atomic_load_umax_i32_noret_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i32_noret_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB336_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, hi
+; OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB336_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umax i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -4415,6 +7864,19 @@ define void @test_atomic_load_umax_i32_noret_seq_cst(i32 %offset) nounwind {
 
 define void @test_atomic_load_umax_i64_noret_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i64_noret_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB337_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, hi
+; OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB337_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umax i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -4427,6 +7889,20 @@ define void @test_atomic_load_umax_i64_noret_seq_cst(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_umin_i8_acq_rel(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i8_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:  .LBB338_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxrb w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxtb
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB338_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i8* @var8, i8 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -4440,6 +7916,20 @@ define i8 @test_atomic_load_umin_i8_acq_rel(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_umin_i16_acq_rel(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i16_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:  .LBB339_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxrh w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxth
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB339_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i16* @var16, i16 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -4453,6 +7943,20 @@ define i16 @test_atomic_load_umin_i16_acq_rel(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_umin_i32_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i32_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB340_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB340_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i32* @var32, i32 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -4466,6 +7970,20 @@ define i32 @test_atomic_load_umin_i32_acq_rel(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_umin_i64_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i64_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB341_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, ls
+; OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB341_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i64* @var64, i64 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -4479,6 +7997,19 @@ define i64 @test_atomic_load_umin_i64_acq_rel(i64 %offset) nounwind {
 
 define void @test_atomic_load_umin_i32_noret_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i32_noret_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB342_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, ls
+; OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB342_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umin i32* @var32, i32 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -4491,6 +8022,19 @@ define void @test_atomic_load_umin_i32_noret_acq_rel(i32 %offset) nounwind {
 
 define void @test_atomic_load_umin_i64_noret_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i64_noret_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB343_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, ls
+; OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB343_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umin i64* @var64, i64 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -4503,6 +8047,20 @@ define void @test_atomic_load_umin_i64_noret_acq_rel(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_umin_i8_acquire(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i8_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:  .LBB344_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxrb w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxtb
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB344_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i8* @var8, i8 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -4516,6 +8074,20 @@ define i8 @test_atomic_load_umin_i8_acquire(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_umin_i16_acquire(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i16_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:  .LBB345_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxrh w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxth
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB345_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i16* @var16, i16 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -4529,6 +8101,20 @@ define i16 @test_atomic_load_umin_i16_acquire(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_umin_i32_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i32_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB346_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE-ATOMICS-NEXT:    stxr w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB346_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i32* @var32, i32 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -4542,6 +8128,20 @@ define i32 @test_atomic_load_umin_i32_acquire(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_umin_i64_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i64_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB347_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, ls
+; OUTLINE-ATOMICS-NEXT:    stxr w11, x10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB347_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i64* @var64, i64 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -4555,6 +8155,19 @@ define i64 @test_atomic_load_umin_i64_acquire(i64 %offset) nounwind {
 
 define void @test_atomic_load_umin_i32_noret_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i32_noret_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB348_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, ls
+; OUTLINE-ATOMICS-NEXT:    stxr w10, w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB348_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umin i32* @var32, i32 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -4567,6 +8180,19 @@ define void @test_atomic_load_umin_i32_noret_acquire(i32 %offset) nounwind {
 
 define void @test_atomic_load_umin_i64_noret_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i64_noret_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB349_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, ls
+; OUTLINE-ATOMICS-NEXT:    stxr w10, x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB349_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umin i64* @var64, i64 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -4579,6 +8205,20 @@ define void @test_atomic_load_umin_i64_noret_acquire(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_umin_i8_monotonic(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i8_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:  .LBB350_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxrb w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxtb
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB350_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i8* @var8, i8 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -4592,6 +8232,20 @@ define i8 @test_atomic_load_umin_i8_monotonic(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_umin_i16_monotonic(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i16_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:  .LBB351_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxrh w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxth
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB351_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i16* @var16, i16 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -4605,6 +8259,20 @@ define i16 @test_atomic_load_umin_i16_monotonic(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_umin_i32_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i32_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB352_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxr w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE-ATOMICS-NEXT:    stxr w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB352_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i32* @var32, i32 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -4618,6 +8286,20 @@ define i32 @test_atomic_load_umin_i32_monotonic(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_umin_i64_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i64_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB353_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxr x8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, ls
+; OUTLINE-ATOMICS-NEXT:    stxr w11, x10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB353_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i64* @var64, i64 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -4631,6 +8313,19 @@ define i64 @test_atomic_load_umin_i64_monotonic(i64 %offset) nounwind {
 
 define void @test_atomic_load_umin_i32_noret_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i32_noret_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB354_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxr w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, ls
+; OUTLINE-ATOMICS-NEXT:    stxr w10, w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB354_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umin i32* @var32, i32 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -4643,6 +8338,19 @@ define void @test_atomic_load_umin_i32_noret_monotonic(i32 %offset) nounwind {
 
 define void @test_atomic_load_umin_i64_noret_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i64_noret_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB355_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxr x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, ls
+; OUTLINE-ATOMICS-NEXT:    stxr w10, x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB355_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umin i64* @var64, i64 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -4655,6 +8363,20 @@ define void @test_atomic_load_umin_i64_noret_monotonic(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_umin_i8_release(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i8_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:  .LBB356_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxrb w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxtb
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB356_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i8* @var8, i8 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -4668,6 +8390,20 @@ define i8 @test_atomic_load_umin_i8_release(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_umin_i16_release(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i16_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:  .LBB357_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxrh w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxth
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB357_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i16* @var16, i16 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -4681,6 +8417,20 @@ define i16 @test_atomic_load_umin_i16_release(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_umin_i32_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i32_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB358_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxr w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB358_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i32* @var32, i32 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -4694,6 +8444,20 @@ define i32 @test_atomic_load_umin_i32_release(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_umin_i64_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i64_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB359_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxr x8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, ls
+; OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB359_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i64* @var64, i64 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -4707,6 +8471,19 @@ define i64 @test_atomic_load_umin_i64_release(i64 %offset) nounwind {
 
 define void @test_atomic_load_umin_i32_noret_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i32_noret_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB360_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxr w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, ls
+; OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB360_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umin i32* @var32, i32 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -4719,6 +8496,19 @@ define void @test_atomic_load_umin_i32_noret_release(i32 %offset) nounwind {
 
 define void @test_atomic_load_umin_i64_noret_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i64_noret_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB361_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxr x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, ls
+; OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB361_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umin i64* @var64, i64 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -4731,6 +8521,20 @@ define void @test_atomic_load_umin_i64_noret_release(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_umin_i8_seq_cst(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i8_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:  .LBB362_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxrb w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxtb
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB362_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -4744,6 +8548,20 @@ define i8 @test_atomic_load_umin_i8_seq_cst(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_umin_i16_seq_cst(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i16_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:  .LBB363_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxrh w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, uxth
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB363_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i16* @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -4757,6 +8575,20 @@ define i16 @test_atomic_load_umin_i16_seq_cst(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_umin_i32_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i32_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB364_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB364_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -4770,6 +8602,20 @@ define i32 @test_atomic_load_umin_i32_seq_cst(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_umin_i64_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i64_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB365_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
+; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, ls
+; OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB365_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -4783,6 +8629,19 @@ define i64 @test_atomic_load_umin_i64_seq_cst(i64 %offset) nounwind {
 
 define void @test_atomic_load_umin_i32_noret_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i32_noret_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:  .LBB366_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, ls
+; OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB366_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umin i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -4795,6 +8654,19 @@ define void @test_atomic_load_umin_i32_noret_seq_cst(i32 %offset) nounwind {
 
 define void @test_atomic_load_umin_i64_noret_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i64_noret_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:  .LBB367_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, ls
+; OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB367_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umin i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -4807,6 +8679,14 @@ define void @test_atomic_load_umin_i64_noret_seq_cst(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_xor_i8_acq_rel(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i8_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor1_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor i8* @var8, i8 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -4820,6 +8700,14 @@ define i8 @test_atomic_load_xor_i8_acq_rel(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_xor_i16_acq_rel(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i16_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor2_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor i16* @var16, i16 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -4833,6 +8721,14 @@ define i16 @test_atomic_load_xor_i16_acq_rel(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_xor_i32_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i32_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor i32* @var32, i32 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -4846,6 +8742,14 @@ define i32 @test_atomic_load_xor_i32_acq_rel(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_xor_i64_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i64_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor i64* @var64, i64 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -4859,6 +8763,14 @@ define i64 @test_atomic_load_xor_i64_acq_rel(i64 %offset) nounwind {
 
 define void @test_atomic_load_xor_i32_noret_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i32_noret_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xor i32* @var32, i32 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -4871,6 +8783,14 @@ define void @test_atomic_load_xor_i32_noret_acq_rel(i32 %offset) nounwind {
 
 define void @test_atomic_load_xor_i64_noret_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i64_noret_acq_rel:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret_acq_rel:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xor i64* @var64, i64 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -4883,6 +8803,14 @@ define void @test_atomic_load_xor_i64_noret_acq_rel(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_xor_i8_acquire(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i8_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor1_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor i8* @var8, i8 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -4896,6 +8824,14 @@ define i8 @test_atomic_load_xor_i8_acquire(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_xor_i16_acquire(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i16_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor2_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor i16* @var16, i16 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -4909,6 +8845,14 @@ define i16 @test_atomic_load_xor_i16_acquire(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_xor_i32_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i32_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor i32* @var32, i32 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -4922,6 +8866,14 @@ define i32 @test_atomic_load_xor_i32_acquire(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_xor_i64_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i64_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor i64* @var64, i64 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -4935,6 +8887,14 @@ define i64 @test_atomic_load_xor_i64_acquire(i64 %offset) nounwind {
 
 define void @test_atomic_load_xor_i32_noret_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i32_noret_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xor i32* @var32, i32 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -4947,6 +8907,14 @@ define void @test_atomic_load_xor_i32_noret_acquire(i32 %offset) nounwind {
 
 define void @test_atomic_load_xor_i64_noret_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i64_noret_acquire:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret_acquire:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_acq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xor i64* @var64, i64 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -4959,6 +8927,14 @@ define void @test_atomic_load_xor_i64_noret_acquire(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_xor_i8_monotonic(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i8_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor1_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor i8* @var8, i8 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -4972,6 +8948,14 @@ define i8 @test_atomic_load_xor_i8_monotonic(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_xor_i16_monotonic(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i16_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor2_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor i16* @var16, i16 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -4985,6 +8969,14 @@ define i16 @test_atomic_load_xor_i16_monotonic(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_xor_i32_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i32_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor i32* @var32, i32 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -4998,6 +8990,14 @@ define i32 @test_atomic_load_xor_i32_monotonic(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_xor_i64_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i64_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor i64* @var64, i64 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -5011,6 +9011,14 @@ define i64 @test_atomic_load_xor_i64_monotonic(i64 %offset) nounwind {
 
 define void @test_atomic_load_xor_i32_noret_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i32_noret_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xor i32* @var32, i32 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -5023,6 +9031,14 @@ define void @test_atomic_load_xor_i32_noret_monotonic(i32 %offset) nounwind {
 
 define void @test_atomic_load_xor_i64_noret_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i64_noret_monotonic:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret_monotonic:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_relax
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xor i64* @var64, i64 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -5035,6 +9051,14 @@ define void @test_atomic_load_xor_i64_noret_monotonic(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_xor_i8_release(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i8_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor1_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor i8* @var8, i8 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -5048,6 +9072,14 @@ define i8 @test_atomic_load_xor_i8_release(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_xor_i16_release(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i16_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor2_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor i16* @var16, i16 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -5061,6 +9093,14 @@ define i16 @test_atomic_load_xor_i16_release(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_xor_i32_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i32_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor i32* @var32, i32 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -5074,6 +9114,14 @@ define i32 @test_atomic_load_xor_i32_release(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_xor_i64_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i64_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor i64* @var64, i64 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -5087,6 +9135,14 @@ define i64 @test_atomic_load_xor_i64_release(i64 %offset) nounwind {
 
 define void @test_atomic_load_xor_i32_noret_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i32_noret_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xor i32* @var32, i32 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -5099,6 +9155,14 @@ define void @test_atomic_load_xor_i32_noret_release(i32 %offset) nounwind {
 
 define void @test_atomic_load_xor_i64_noret_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i64_noret_release:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret_release:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xor i64* @var64, i64 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -5111,6 +9175,14 @@ define void @test_atomic_load_xor_i64_noret_release(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_xor_i8_seq_cst(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i8_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor1_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -5124,6 +9196,14 @@ define i8 @test_atomic_load_xor_i8_seq_cst(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_xor_i16_seq_cst(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i16_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor2_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor i16* @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -5137,6 +9217,14 @@ define i16 @test_atomic_load_xor_i16_seq_cst(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_xor_i32_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i32_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -5150,6 +9238,14 @@ define i32 @test_atomic_load_xor_i32_seq_cst(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_xor_i64_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i64_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -5163,6 +9259,14 @@ define i64 @test_atomic_load_xor_i64_seq_cst(i64 %offset) nounwind {
 
 define void @test_atomic_load_xor_i32_noret_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i32_noret_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xor i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -5175,6 +9279,14 @@ define void @test_atomic_load_xor_i32_noret_seq_cst(i32 %offset) nounwind {
 
 define void @test_atomic_load_xor_i64_noret_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i64_noret_seq_cst:
+; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret_seq_cst:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xor i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64

diff  --git a/llvm/test/CodeGen/AArch64/atomic-ops-not-barriers.ll b/llvm/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
index 44e777161e8e..25fb9b841b25 100644
--- a/llvm/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
+++ b/llvm/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
@@ -1,6 +1,8 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -mattr=+outline-atomics < %s | FileCheck %s --check-prefix=OUTLINE-ATOMICS
 
 define i32 @foo(i32* %var, i1 %cond) {
+; OUTLINE-ATOMICS: bl __aarch64_ldadd4_relax
 ; CHECK-LABEL: foo:
   br i1 %cond, label %atomic_ver, label %simple_ver
 simple_ver:

diff  --git a/llvm/test/CodeGen/AArch64/atomic-ops.ll b/llvm/test/CodeGen/AArch64/atomic-ops.ll
index b763e065200d..1a52e31ab043 100644
--- a/llvm/test/CodeGen/AArch64/atomic-ops.ll
+++ b/llvm/test/CodeGen/AArch64/atomic-ops.ll
@@ -1,5 +1,6 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-REG
+; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+outline-atomics < %s | FileCheck %s --check-prefix=OUTLINE_ATOMICS
 
 
 ; Point of CHECK-REG is to make sure UNPREDICTABLE instructions aren't created
@@ -14,6 +15,14 @@
 
 define i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i8:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_add_i8:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE_ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldadd1_acq_rel
+; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw add i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -34,6 +43,14 @@ define i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_add_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i16:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_add_i16:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE_ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldadd2_acq
+; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw add i16* @var16, i16 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -54,6 +71,14 @@ define i16 @test_atomic_load_add_i16(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i32:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_add_i32:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE_ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldadd4_rel
+; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw add i32* @var32, i32 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -74,6 +99,14 @@ define i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i64:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_add_i64:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE_ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldadd8_relax
+; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw add i64* @var64, i64 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -94,6 +127,15 @@ define i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i8:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_sub_i8:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE_ATOMICS-NEXT:    neg w0, w0
+; OUTLINE_ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldadd1_relax
+; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw sub i8* @var8, i8 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -114,6 +156,15 @@ define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i16:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_sub_i16:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE_ATOMICS-NEXT:    neg w0, w0
+; OUTLINE_ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldadd2_rel
+; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw sub i16* @var16, i16 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -134,6 +185,15 @@ define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i32:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_sub_i32:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE_ATOMICS-NEXT:    neg w0, w0
+; OUTLINE_ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldadd4_acq
+; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw sub i32* @var32, i32 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -154,6 +214,15 @@ define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i64:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_sub_i64:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE_ATOMICS-NEXT:    neg x0, x0
+; OUTLINE_ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
+; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw sub i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -174,6 +243,15 @@ define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i8:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_and_i8:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE_ATOMICS-NEXT:    mvn w0, w0
+; OUTLINE_ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldclr1_rel
+; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw and i8* @var8, i8 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -194,6 +272,15 @@ define i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_and_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i16:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_and_i16:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE_ATOMICS-NEXT:    mvn w0, w0
+; OUTLINE_ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldclr2_relax
+; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw and i16* @var16, i16 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -214,6 +301,15 @@ define i16 @test_atomic_load_and_i16(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_and_i32(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i32:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_and_i32:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE_ATOMICS-NEXT:    mvn w0, w0
+; OUTLINE_ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldclr4_acq_rel
+; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw and i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -234,6 +330,15 @@ define i32 @test_atomic_load_and_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_and_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i64:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_and_i64:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE_ATOMICS-NEXT:    mvn x0, x0
+; OUTLINE_ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldclr8_acq
+; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw and i64* @var64, i64 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -254,6 +359,14 @@ define i64 @test_atomic_load_and_i64(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i8:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_or_i8:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE_ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldset1_acq_rel
+; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw or i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -274,6 +387,14 @@ define i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i16:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_or_i16:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE_ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldset2_relax
+; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw or i16* @var16, i16 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -294,6 +415,14 @@ define i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_or_i32(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i32:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_or_i32:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE_ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldset4_acq
+; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw or i32* @var32, i32 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -314,6 +443,14 @@ define i32 @test_atomic_load_or_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i64:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_or_i64:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE_ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldset8_rel
+; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw or i64* @var64, i64 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -334,6 +471,14 @@ define i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i8:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_xor_i8:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE_ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldeor1_acq
+; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw xor i8* @var8, i8 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -354,6 +499,14 @@ define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i16:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_xor_i16:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE_ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldeor2_rel
+; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw xor i16* @var16, i16 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -374,6 +527,14 @@ define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i32:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_xor_i32:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE_ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldeor4_acq_rel
+; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw xor i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -394,6 +555,14 @@ define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i64:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_xor_i64:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE_ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldeor8_relax
+; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw xor i64* @var64, i64 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -414,6 +583,14 @@ define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i8:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_xchg_i8:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE_ATOMICS-NEXT:    adrp x1, var8
+; OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; OUTLINE_ATOMICS-NEXT:    bl __aarch64_swp1_relax
+; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw xchg i8* @var8, i8 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -433,6 +610,14 @@ define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i16:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_xchg_i16:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE_ATOMICS-NEXT:    adrp x1, var16
+; OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; OUTLINE_ATOMICS-NEXT:    bl __aarch64_swp2_acq_rel
+; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw xchg i16* @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -453,6 +638,14 @@ define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
 define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i32:
 ; CHECK: mov {{[xw]}}8, w[[OLD:[0-9]+]]
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_xchg_i32:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE_ATOMICS-NEXT:    adrp x1, var32
+; OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; OUTLINE_ATOMICS-NEXT:    bl __aarch64_swp4_rel
+; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw xchg i32* @var32, i32 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -470,6 +663,14 @@ define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i64:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_xchg_i64:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE_ATOMICS-NEXT:    adrp x1, var64
+; OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; OUTLINE_ATOMICS-NEXT:    bl __aarch64_swp8_acq
+; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw xchg i64* @var64, i64 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -490,6 +691,21 @@ define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_min_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i8:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_min_i8:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE_ATOMICS-NEXT:  .LBB24_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxrb w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    sxtb w8, w10
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0, sxtb
+; OUTLINE_ATOMICS-NEXT:    csel w10, w10, w0, le
+; OUTLINE_ATOMICS-NEXT:    stxrb w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB24_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw min i8* @var8, i8 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -514,6 +730,21 @@ define i8 @test_atomic_load_min_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_min_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i16:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_min_i16:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE_ATOMICS-NEXT:  .LBB25_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldxrh w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    sxth w8, w10
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0, sxth
+; OUTLINE_ATOMICS-NEXT:    csel w10, w10, w0, le
+; OUTLINE_ATOMICS-NEXT:    stlxrh w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB25_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw min i16* @var16, i16 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -539,6 +770,20 @@ define i16 @test_atomic_load_min_i16(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i32:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_min_i32:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE_ATOMICS-NEXT:  .LBB26_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldxr w8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE_ATOMICS-NEXT:    csel w10, w8, w0, le
+; OUTLINE_ATOMICS-NEXT:    stxr w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB26_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw min i32* @var32, i32 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -563,6 +808,20 @@ define i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_min_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i64:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_min_i64:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE_ATOMICS-NEXT:  .LBB27_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxr x8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE_ATOMICS-NEXT:    csel x10, x8, x0, le
+; OUTLINE_ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB27_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov x0, x8
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw min i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -587,6 +846,21 @@ define i64 @test_atomic_load_min_i64(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i8:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_max_i8:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE_ATOMICS-NEXT:  .LBB28_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxrb w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    sxtb w8, w10
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0, sxtb
+; OUTLINE_ATOMICS-NEXT:    csel w10, w10, w0, gt
+; OUTLINE_ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB28_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw max i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -612,6 +886,21 @@ define i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_max_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i16:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_max_i16:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE_ATOMICS-NEXT:  .LBB29_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxrh w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    sxth w8, w10
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0, sxth
+; OUTLINE_ATOMICS-NEXT:    csel w10, w10, w0, gt
+; OUTLINE_ATOMICS-NEXT:    stxrh w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB29_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw max i16* @var16, i16 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -637,6 +926,20 @@ define i16 @test_atomic_load_max_i16(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_max_i32(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i32:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_max_i32:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE_ATOMICS-NEXT:  .LBB30_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldxr w8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE_ATOMICS-NEXT:    csel w10, w8, w0, gt
+; OUTLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB30_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw max i32* @var32, i32 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -661,6 +964,20 @@ define i32 @test_atomic_load_max_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i64:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_max_i64:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE_ATOMICS-NEXT:  .LBB31_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldxr x8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE_ATOMICS-NEXT:    csel x10, x8, x0, gt
+; OUTLINE_ATOMICS-NEXT:    stxr w11, x10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB31_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov x0, x8
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw max i64* @var64, i64 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -685,6 +1002,20 @@ define i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i8:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_umin_i8:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE_ATOMICS-NEXT:  .LBB32_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldxrb w8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0, uxtb
+; OUTLINE_ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE_ATOMICS-NEXT:    stxrb w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB32_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw umin i8* @var8, i8 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -709,6 +1040,20 @@ define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i16:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_umin_i16:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE_ATOMICS-NEXT:  .LBB33_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxrh w8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0, uxth
+; OUTLINE_ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE_ATOMICS-NEXT:    stxrh w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB33_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw umin i16* @var16, i16 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -733,6 +1078,20 @@ define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i32:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_umin_i32:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE_ATOMICS-NEXT:  .LBB34_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE_ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB34_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw umin i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -757,6 +1116,20 @@ define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i64:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_umin_i64:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE_ATOMICS-NEXT:  .LBB35_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxr x8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE_ATOMICS-NEXT:    csel x10, x8, x0, ls
+; OUTLINE_ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB35_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov x0, x8
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw umin i64* @var64, i64 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -781,6 +1154,20 @@ define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i8:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_umax_i8:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE_ATOMICS-NEXT:  .LBB36_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxrb w8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0, uxtb
+; OUTLINE_ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE_ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB36_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw umax i8* @var8, i8 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -805,6 +1192,20 @@ define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i16:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_umax_i16:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE_ATOMICS-NEXT:  .LBB37_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldxrh w8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0, uxth
+; OUTLINE_ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE_ATOMICS-NEXT:    stxrh w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB37_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw umax i16* @var16, i16 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -829,6 +1230,20 @@ define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i32:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_umax_i32:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE_ATOMICS-NEXT:  .LBB38_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE_ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB38_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw umax i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -853,6 +1268,20 @@ define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i64:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_umax_i64:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE_ATOMICS-NEXT:  .LBB39_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldxr x8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE_ATOMICS-NEXT:    csel x10, x8, x0, hi
+; OUTLINE_ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB39_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov x0, x8
+; OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw umax i64* @var64, i64 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -877,6 +1306,14 @@ define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
 
 define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i8:
+; OUTLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i8:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE_ATOMICS-NEXT:    adrp x2, var8
+; OUTLINE_ATOMICS-NEXT:    add x2, x2, :lo12:var8
+; OUTLINE_ATOMICS-NEXT:    bl __aarch64_cas1_acq
+; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE_ATOMICS-NEXT:    ret
    %pair = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire
    %old = extractvalue { i8, i1 } %pair, 0
 
@@ -902,6 +1339,14 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 
 define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i16:
+; OUTLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i16:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE_ATOMICS-NEXT:    adrp x2, var16
+; OUTLINE_ATOMICS-NEXT:    add x2, x2, :lo12:var16
+; OUTLINE_ATOMICS-NEXT:    bl __aarch64_cas2_acq_rel
+; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE_ATOMICS-NEXT:    ret
    %pair = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst seq_cst
    %old = extractvalue { i16, i1 } %pair, 0
 
@@ -927,6 +1372,14 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 
 define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i32:
+; OUTLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i32:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE_ATOMICS-NEXT:    adrp x2, var32
+; OUTLINE_ATOMICS-NEXT:    add x2, x2, :lo12:var32
+; OUTLINE_ATOMICS-NEXT:    bl __aarch64_cas4_rel
+; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE_ATOMICS-NEXT:    ret
    %pair = cmpxchg i32* @var32, i32 %wanted, i32 %new release monotonic
    %old = extractvalue { i32, i1 } %pair, 0
 
@@ -950,6 +1403,16 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 
 define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i64:
+; OUTLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i64:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; OUTLINE_ATOMICS-NEXT:    adrp x19, var64
+; OUTLINE_ATOMICS-NEXT:    add x19, x19, :lo12:var64
+; OUTLINE_ATOMICS-NEXT:    mov x2, x19
+; OUTLINE_ATOMICS-NEXT:    bl __aarch64_cas8_relax
+; OUTLINE_ATOMICS-NEXT:    str x0, [x19]
+; OUTLINE_ATOMICS-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; OUTLINE_ATOMICS-NEXT:    ret
    %pair = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic monotonic
    %old = extractvalue { i64, i1 } %pair, 0
 
@@ -977,6 +1440,11 @@ define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
 
 define i8 @test_atomic_load_monotonic_i8() nounwind {
 ; CHECK-LABEL: test_atomic_load_monotonic_i8:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_monotonic_i8:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x8, var8
+; OUTLINE_ATOMICS-NEXT:    ldrb w0, [x8, :lo12:var8]
+; OUTLINE_ATOMICS-NEXT:    ret
   %val = load atomic i8, i8* @var8 monotonic, align 1
 ; CHECK-NOT: dmb
 ; CHECK: adrp x[[HIADDR:[0-9]+]], var8
@@ -988,6 +1456,10 @@ define i8 @test_atomic_load_monotonic_i8() nounwind {
 
 define i8 @test_atomic_load_monotonic_regoff_i8(i64 %base, i64 %off) nounwind {
 ; CHECK-LABEL: test_atomic_load_monotonic_regoff_i8:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_monotonic_regoff_i8:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    ldrb w0, [x0, x1]
+; OUTLINE_ATOMICS-NEXT:    ret
   %addr_int = add i64 %base, %off
   %addr = inttoptr i64 %addr_int to i8*
 
@@ -1001,6 +1473,12 @@ define i8 @test_atomic_load_monotonic_regoff_i8(i64 %base, i64 %off) nounwind {
 
 define i8 @test_atomic_load_acquire_i8() nounwind {
 ; CHECK-LABEL: test_atomic_load_acquire_i8:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_acquire_i8:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x8, var8
+; OUTLINE_ATOMICS-NEXT:    add x8, x8, :lo12:var8
+; OUTLINE_ATOMICS-NEXT:    ldarb w0, [x8]
+; OUTLINE_ATOMICS-NEXT:    ret
   %val = load atomic i8, i8* @var8 acquire, align 1
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -1014,6 +1492,12 @@ define i8 @test_atomic_load_acquire_i8() nounwind {
 
 define i8 @test_atomic_load_seq_cst_i8() nounwind {
 ; CHECK-LABEL: test_atomic_load_seq_cst_i8:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_seq_cst_i8:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x8, var8
+; OUTLINE_ATOMICS-NEXT:    add x8, x8, :lo12:var8
+; OUTLINE_ATOMICS-NEXT:    ldarb w0, [x8]
+; OUTLINE_ATOMICS-NEXT:    ret
   %val = load atomic i8, i8* @var8 seq_cst, align 1
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[HIADDR:x[0-9]+]], var8
@@ -1027,6 +1511,11 @@ define i8 @test_atomic_load_seq_cst_i8() nounwind {
 
 define i16 @test_atomic_load_monotonic_i16() nounwind {
 ; CHECK-LABEL: test_atomic_load_monotonic_i16:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_monotonic_i16:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x8, var16
+; OUTLINE_ATOMICS-NEXT:    ldrh w0, [x8, :lo12:var16]
+; OUTLINE_ATOMICS-NEXT:    ret
   %val = load atomic i16, i16* @var16 monotonic, align 2
 ; CHECK-NOT: dmb
 ; CHECK: adrp x[[HIADDR:[0-9]+]], var16
@@ -1039,6 +1528,10 @@ define i16 @test_atomic_load_monotonic_i16() nounwind {
 
 define i32 @test_atomic_load_monotonic_regoff_i32(i64 %base, i64 %off) nounwind {
 ; CHECK-LABEL: test_atomic_load_monotonic_regoff_i32:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_monotonic_regoff_i32:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    ldr w0, [x0, x1]
+; OUTLINE_ATOMICS-NEXT:    ret
   %addr_int = add i64 %base, %off
   %addr = inttoptr i64 %addr_int to i32*
 
@@ -1052,6 +1545,12 @@ define i32 @test_atomic_load_monotonic_regoff_i32(i64 %base, i64 %off) nounwind
 
 define i64 @test_atomic_load_seq_cst_i64() nounwind {
 ; CHECK-LABEL: test_atomic_load_seq_cst_i64:
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_seq_cst_i64:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x8, var64
+; OUTLINE_ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; OUTLINE_ATOMICS-NEXT:    ldar x0, [x8]
+; OUTLINE_ATOMICS-NEXT:    ret
   %val = load atomic i64, i64* @var64 seq_cst, align 8
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[HIADDR:x[0-9]+]], var64
@@ -1065,6 +1564,11 @@ define i64 @test_atomic_load_seq_cst_i64() nounwind {
 
 define void @test_atomic_store_monotonic_i8(i8 %val) nounwind {
 ; CHECK-LABEL: test_atomic_store_monotonic_i8:
+; OUTLINE_ATOMICS-LABEL: test_atomic_store_monotonic_i8:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x8, var8
+; OUTLINE_ATOMICS-NEXT:    strb w0, [x8, :lo12:var8]
+; OUTLINE_ATOMICS-NEXT:    ret
   store atomic i8 %val, i8* @var8 monotonic, align 1
 ; CHECK: adrp x[[HIADDR:[0-9]+]], var8
 ; CHECK: strb w0, [x[[HIADDR]], {{#?}}:lo12:var8]
@@ -1074,7 +1578,10 @@ define void @test_atomic_store_monotonic_i8(i8 %val) nounwind {
 
 define void @test_atomic_store_monotonic_regoff_i8(i64 %base, i64 %off, i8 %val) nounwind {
 ; CHECK-LABEL: test_atomic_store_monotonic_regoff_i8:
-
+; OUTLINE_ATOMICS-LABEL: test_atomic_store_monotonic_regoff_i8:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    strb w2, [x0, x1]
+; OUTLINE_ATOMICS-NEXT:    ret
   %addr_int = add i64 %base, %off
   %addr = inttoptr i64 %addr_int to i8*
 
@@ -1085,6 +1592,12 @@ define void @test_atomic_store_monotonic_regoff_i8(i64 %base, i64 %off, i8 %val)
 }
 define void @test_atomic_store_release_i8(i8 %val) nounwind {
 ; CHECK-LABEL: test_atomic_store_release_i8:
+; OUTLINE_ATOMICS-LABEL: test_atomic_store_release_i8:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x8, var8
+; OUTLINE_ATOMICS-NEXT:    add x8, x8, :lo12:var8
+; OUTLINE_ATOMICS-NEXT:    stlrb w0, [x8]
+; OUTLINE_ATOMICS-NEXT:    ret
   store atomic i8 %val, i8* @var8 release, align 1
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[HIADDR:x[0-9]+]], var8
@@ -1098,6 +1611,12 @@ define void @test_atomic_store_release_i8(i8 %val) nounwind {
 
 define void @test_atomic_store_seq_cst_i8(i8 %val) nounwind {
 ; CHECK-LABEL: test_atomic_store_seq_cst_i8:
+; OUTLINE_ATOMICS-LABEL: test_atomic_store_seq_cst_i8:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x8, var8
+; OUTLINE_ATOMICS-NEXT:    add x8, x8, :lo12:var8
+; OUTLINE_ATOMICS-NEXT:    stlrb w0, [x8]
+; OUTLINE_ATOMICS-NEXT:    ret
   store atomic i8 %val, i8* @var8 seq_cst, align 1
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[HIADDR:x[0-9]+]], var8
@@ -1112,6 +1631,11 @@ define void @test_atomic_store_seq_cst_i8(i8 %val) nounwind {
 
 define void @test_atomic_store_monotonic_i16(i16 %val) nounwind {
 ; CHECK-LABEL: test_atomic_store_monotonic_i16:
+; OUTLINE_ATOMICS-LABEL: test_atomic_store_monotonic_i16:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x8, var16
+; OUTLINE_ATOMICS-NEXT:    strh w0, [x8, :lo12:var16]
+; OUTLINE_ATOMICS-NEXT:    ret
   store atomic i16 %val, i16* @var16 monotonic, align 2
 ; CHECK-NOT: dmb
 ; CHECK: adrp x[[HIADDR:[0-9]+]], var16
@@ -1123,7 +1647,10 @@ define void @test_atomic_store_monotonic_i16(i16 %val) nounwind {
 
 define void @test_atomic_store_monotonic_regoff_i32(i64 %base, i64 %off, i32 %val) nounwind {
 ; CHECK-LABEL: test_atomic_store_monotonic_regoff_i32:
-
+; OUTLINE_ATOMICS-LABEL: test_atomic_store_monotonic_regoff_i32:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    str w2, [x0, x1]
+; OUTLINE_ATOMICS-NEXT:    ret
   %addr_int = add i64 %base, %off
   %addr = inttoptr i64 %addr_int to i32*
 
@@ -1137,6 +1664,12 @@ define void @test_atomic_store_monotonic_regoff_i32(i64 %base, i64 %off, i32 %va
 
 define void @test_atomic_store_release_i64(i64 %val) nounwind {
 ; CHECK-LABEL: test_atomic_store_release_i64:
+; OUTLINE_ATOMICS-LABEL: test_atomic_store_release_i64:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x8, var64
+; OUTLINE_ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; OUTLINE_ATOMICS-NEXT:    stlr x0, [x8]
+; OUTLINE_ATOMICS-NEXT:    ret
   store atomic i64 %val, i64* @var64 release, align 8
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[HIADDR:x[0-9]+]], var64

diff  --git a/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll b/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll
index 43e36dd88209..def41594ede3 100644
--- a/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll
+++ b/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll
@@ -1,6 +1,8 @@
 ; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 -fast-isel=0 -global-isel=false %s -o - | FileCheck -enable-var-scope %s
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 -fast-isel=0 -global-isel=false -mattr=+outline-atomics %s -o - | FileCheck -enable-var-scope %s --check-prefix=OUTLINE-ATOMICS
 
 define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind {
+; OUTLINE-ATOMICS: bl __aarch64_cas1_acq_rel
 ; CHECK-LABEL: test_cmpxchg_8:
 ; CHECK:     mov [[ADDR:x[0-9]+]], x0
 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
@@ -17,6 +19,7 @@ define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind {
 }
 
 define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind {
+; OUTLINE-ATOMICS: bl __aarch64_cas2_acq_rel
 ; CHECK-LABEL: test_cmpxchg_16:
 ; CHECK:     mov [[ADDR:x[0-9]+]], x0
 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
@@ -33,6 +36,7 @@ define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind
 }
 
 define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind {
+; OUTLINE-ATOMICS: bl __aarch64_cas4_acq_rel
 ; CHECK-LABEL: test_cmpxchg_32:
 ; CHECK:     mov [[ADDR:x[0-9]+]], x0
 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
@@ -49,6 +53,7 @@ define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind
 }
 
 define { i64, i1 } @test_cmpxchg_64(i64* %addr, i64 %desired, i64 %new) nounwind {
+; OUTLINE-ATOMICS: bl __aarch64_cas8_acq_rel
 ; CHECK-LABEL: test_cmpxchg_64:
 ; CHECK:     mov [[ADDR:x[0-9]+]], x0
 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
@@ -65,6 +70,7 @@ define { i64, i1 } @test_cmpxchg_64(i64* %addr, i64 %desired, i64 %new) nounwind
 }
 
 define { i128, i1 } @test_cmpxchg_128(i128* %addr, i128 %desired, i128 %new) nounwind {
+; OUTLINE-ATOMICS: bl __aarch64_cas16_acq_rel
 ; CHECK-LABEL: test_cmpxchg_128:
 ; CHECK:     mov [[ADDR:x[0-9]+]], x0
 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
@@ -86,6 +92,7 @@ define { i128, i1 } @test_cmpxchg_128(i128* %addr, i128 %desired, i128 %new) nou
 ; was false.
 @var128 = global i128 0
 define {i128, i1} @test_cmpxchg_128_unsplit(i128* %addr) {
+; OUTLINE-ATOMICS: bl __aarch64_cas16_acq_rel
 ; CHECK-LABEL: test_cmpxchg_128_unsplit:
 ; CHECK:     mov [[ADDR:x[0-9]+]], x0
 ; CHECK:     add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128

diff  --git a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
index 3bafd8c3a35f..2b356c5981a8 100644
--- a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
+++ b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
@@ -1,6 +1,8 @@
 ; RUN: llc -mtriple=aarch64-apple-ios7.0 -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-ios7.0 -mattr=+outline-atomics -o - %s | FileCheck %s --check-prefix=OUTLINE-ATOMICS
 
 define i32 @test_return(i32* %p, i32 %oldval, i32 %newval) {
+; OUTLINE-ATOMICS: bl ___aarch64_cas4_acq_rel
 ; CHECK-LABEL: test_return:
 
 ; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
@@ -27,6 +29,7 @@ define i32 @test_return(i32* %p, i32 %oldval, i32 %newval) {
 }
 
 define i1 @test_return_bool(i8* %value, i8 %oldValue, i8 %newValue) {
+; OUTLINE-ATOMICS: bl ___aarch64_cas1_acq_rel
 ; CHECK-LABEL: test_return_bool:
 
 ; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
@@ -55,6 +58,7 @@ define i1 @test_return_bool(i8* %value, i8 %oldValue, i8 %newValue) {
 }
 
 define void @test_conditional(i32* %p, i32 %oldval, i32 %newval) {
+; OUTLINE-ATOMICS: bl ___aarch64_cas4_acq_rel
 ; CHECK-LABEL: test_conditional:
 
 ; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
@@ -92,6 +96,7 @@ declare void @bar()
 declare void @baz()
 
 define i1 @test_conditional2(i32 %a, i32 %b, i32* %c) {
+; OUTLINE-ATOMICS: bl ___aarch64_cas4_acq_rel
 ; CHECK-LABEL: test_conditional2:
 ; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
 ; CHECK: ldaxr [[LOADED:w[0-9]+]], [x19]

diff  --git a/llvm/test/CodeGen/AArch64/cmpxchg-lse-even-regs.ll b/llvm/test/CodeGen/AArch64/cmpxchg-lse-even-regs.ll
index 9c6d8cc20533..ab083b3c91d9 100644
--- a/llvm/test/CodeGen/AArch64/cmpxchg-lse-even-regs.ll
+++ b/llvm/test/CodeGen/AArch64/cmpxchg-lse-even-regs.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple arm64-apple-ios -mattr=+lse %s -o - | FileCheck %s
+; RUN: llc -mtriple arm64-apple-ios -mattr=+lse -mattr=+outline-atomics %s -o - | FileCheck %s
 
 ; Only "even,even+1" pairs are valid for CASP instructions. Make sure LLVM
 ; doesn't allocate odd ones and that it can copy them around properly. N.b. we

diff  --git a/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll b/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll
index d3d36eb45039..ece757f215a0 100644
--- a/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll
+++ b/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -mtriple=aarch64-- -atomic-expand %s | FileCheck %s
+; RUN: opt -S -mtriple=aarch64-- -mattr=+outline-atomics -atomic-expand %s | FileCheck %s --check-prefix=OUTLINE-ATOMICS
 
 define void @atomic_swap_f16(half* %ptr, half %val) nounwind {
 ; CHECK-LABEL: @atomic_swap_f16(
@@ -15,6 +16,10 @@ define void @atomic_swap_f16(half* %ptr, half %val) nounwind {
 ; CHECK-NEXT:    br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]]
 ; CHECK:       atomicrmw.end:
 ; CHECK-NEXT:    ret void
+;
+; OUTLINE-ATOMICS-LABEL: @atomic_swap_f16(
+; OUTLINE-ATOMICS-NEXT:    [[T1:%.*]] = atomicrmw xchg half* [[PTR:%.*]], half [[VAL:%.*]] acquire
+; OUTLINE-ATOMICS-NEXT:    ret void
 ;
   %t1 = atomicrmw xchg half* %ptr, half %val acquire
   ret void
@@ -34,6 +39,10 @@ define void @atomic_swap_f32(float* %ptr, float %val) nounwind {
 ; CHECK-NEXT:    br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]]
 ; CHECK:       atomicrmw.end:
 ; CHECK-NEXT:    ret void
+;
+; OUTLINE-ATOMICS-LABEL: @atomic_swap_f32(
+; OUTLINE-ATOMICS-NEXT:    [[T1:%.*]] = atomicrmw xchg float* [[PTR:%.*]], float [[VAL:%.*]] acquire
+; OUTLINE-ATOMICS-NEXT:    ret void
 ;
   %t1 = atomicrmw xchg float* %ptr, float %val acquire
   ret void
@@ -51,6 +60,10 @@ define void @atomic_swap_f64(double* %ptr, double %val) nounwind {
 ; CHECK-NEXT:    br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]]
 ; CHECK:       atomicrmw.end:
 ; CHECK-NEXT:    ret void
+;
+; OUTLINE-ATOMICS-LABEL: @atomic_swap_f64(
+; OUTLINE-ATOMICS-NEXT:    [[T1:%.*]] = atomicrmw xchg double* [[PTR:%.*]], double [[VAL:%.*]] acquire
+; OUTLINE-ATOMICS-NEXT:    ret void
 ;
   %t1 = atomicrmw xchg double* %ptr, double %val acquire
   ret void


        


More information about the llvm-commits mailing list