[llvm] [WoA] Remove extra barriers after ARM LSE instructions with MSVC (PR #169596)

Usman Nadeem via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 26 00:08:40 PST 2025


https://github.com/UsmanNadeem updated https://github.com/llvm/llvm-project/pull/169596

>From 076e7ea4f31ea97e86817c7b1e90501096cca838 Mon Sep 17 00:00:00 2001
From: Usman Nadeem <mnadeem at qti.qualcomm.com>
Date: Tue, 25 Nov 2025 17:20:46 -0800
Subject: [PATCH 1/2] [WoA] Remove extra barriers after ARM LSE instructions
 with MSVC

https://github.com/llvm/llvm-project/commit/c9821abfc023fba684c8ef8589c49cba8083f579 added
extra fences after sequentially consistent stores for compatibility
with MSVC's seq_cst loads (ldr+dmb). These extra fences should not be
needed for ARM LSE instructions that have both acquire+release
semantics, which results in a two way barrier, and should be enough
for sequential consistency.

Fixes https://github.com/llvm/llvm-project/issues/162345

Change-Id: I9148c73d0dcf3bf1b18a0915f96cac71ac1800f2
---
 llvm/include/llvm/CodeGen/TargetLowering.h    |     3 +-
 llvm/lib/CodeGen/AtomicExpandPass.cpp         |    23 +-
 .../Target/AArch64/AArch64ISelLowering.cpp    |    40 +-
 llvm/lib/Target/AArch64/AArch64ISelLowering.h |     3 +-
 llvm/test/CodeGen/AArch64/atomic-ops-lse.ll   | 10180 ++++++++++++----
 llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll  |  1019 --
 llvm/test/CodeGen/AArch64/atomic-ops.ll       |  1349 +-
 7 files changed, 8944 insertions(+), 3673 deletions(-)
 delete mode 100644 llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 7df5d8a09f0f6..d901fa2f20055 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2245,8 +2245,7 @@ class LLVM_ABI TargetLoweringBase {
 
   /// Whether AtomicExpandPass should automatically insert a trailing fence
   /// without reducing the ordering for this atomic. Defaults to false.
-  virtual bool
-  shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const {
+  virtual bool storeNeedsSeqCstTrailingFence(Instruction *I) const {
     return false;
   }
 
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index d9bc042d6807e..aec008c570e2a 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -345,21 +345,13 @@ bool AtomicExpandImpl::processAtomicInstr(Instruction *I) {
     if (FenceOrdering != AtomicOrdering::Monotonic) {
       MadeChange |= bracketInstWithFences(I, FenceOrdering);
     }
-  } else if (I->hasAtomicStore() &&
-             TLI->shouldInsertTrailingFenceForAtomicStore(I)) {
-    auto FenceOrdering = AtomicOrdering::Monotonic;
-    if (SI)
-      FenceOrdering = SI->getOrdering();
-    else if (RMWI)
-      FenceOrdering = RMWI->getOrdering();
-    else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) !=
-                         TargetLoweringBase::AtomicExpansionKind::LLSC)
-      // LLSC is handled in expandAtomicCmpXchg().
-      FenceOrdering = CASI->getSuccessOrdering();
-
+  } else if (TLI->storeNeedsSeqCstTrailingFence(I) &&
+             !(CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
+                           TargetLoweringBase::AtomicExpansionKind::LLSC)) {
+    // CmpXchg LLSC is handled in expandAtomicCmpXchg().
     IRBuilder Builder(I);
-    if (auto TrailingFence =
-            TLI->emitTrailingFence(Builder, I, FenceOrdering)) {
+    if (auto TrailingFence = TLI->emitTrailingFence(
+            Builder, I, AtomicOrdering::SequentiallyConsistent)) {
       TrailingFence->moveAfter(I);
       MadeChange = true;
     }
@@ -1511,8 +1503,7 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
   // Make sure later instructions don't get reordered with a fence if
   // necessary.
   Builder.SetInsertPoint(SuccessBB);
-  if (ShouldInsertFencesForAtomic ||
-      TLI->shouldInsertTrailingFenceForAtomicStore(CI))
+  if (ShouldInsertFencesForAtomic || TLI->storeNeedsSeqCstTrailingFence(CI))
     TLI->emitTrailingFence(Builder, CI, SuccessOrder);
   Builder.CreateBr(ExitBB);
 
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 83ce39fa314d1..a99413fe03431 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -29446,8 +29446,8 @@ bool AArch64TargetLowering::shouldInsertFencesForAtomic(
   return false;
 }
 
-bool AArch64TargetLowering::shouldInsertTrailingFenceForAtomicStore(
-    const Instruction *I) const {
+bool AArch64TargetLowering::storeNeedsSeqCstTrailingFence(
+    Instruction *I) const {
   // Store-Release instructions only provide seq_cst guarantees when paired with
   // Load-Acquire instructions. MSVC CRT does not use these instructions to
   // implement seq_cst loads and stores, so we need additional explicit fences
@@ -29455,19 +29455,31 @@ bool AArch64TargetLowering::shouldInsertTrailingFenceForAtomicStore(
   if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
     return false;
 
-  switch (I->getOpcode()) {
-  default:
+  if (auto *SI = dyn_cast<StoreInst>(I))
+    return SI->getOrdering() == AtomicOrdering::SequentiallyConsistent;
+
+  auto *CAS = dyn_cast<AtomicCmpXchgInst>(I);
+  auto *RMW = dyn_cast<AtomicRMWInst>(I);
+  // Not a store.
+  if (!CAS && !RMW)
     return false;
-  case Instruction::AtomicCmpXchg:
-    return cast<AtomicCmpXchgInst>(I)->getSuccessOrdering() ==
-           AtomicOrdering::SequentiallyConsistent;
-  case Instruction::AtomicRMW:
-    return cast<AtomicRMWInst>(I)->getOrdering() ==
-           AtomicOrdering::SequentiallyConsistent;
-  case Instruction::Store:
-    return cast<StoreInst>(I)->getOrdering() ==
-           AtomicOrdering::SequentiallyConsistent;
-  }
+
+  // Fence only needed for seq_cst.
+  if (CAS &&
+      CAS->getSuccessOrdering() != AtomicOrdering::SequentiallyConsistent)
+    return false;
+  if (RMW && RMW->getOrdering() != AtomicOrdering::SequentiallyConsistent)
+    return false;
+
+  // We do not need a fence only if we have LSE and are not expanding.
+  TargetLoweringBase::AtomicExpansionKind ExpandKind =
+      CAS ? shouldExpandAtomicCmpXchgInIR(CAS) : shouldExpandAtomicRMWInIR(RMW);
+  if (ExpandKind == AtomicExpansionKind::None && Subtarget->hasLSE())
+    return false;
+  if (RMW && ExpandKind == AtomicExpansionKind::CmpXChg && Subtarget->hasLSE())
+    return false;
+
+  return true;
 }
 
 // Loads and stores less than 128-bits are already atomic; ones above that
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index ca08eb40c956a..8a99fcad212c2 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -349,8 +349,7 @@ class AArch64TargetLowering : public TargetLowering {
   bool isOpSuitableForLSE128(const Instruction *I) const;
   bool isOpSuitableForRCPC3(const Instruction *I) const;
   bool shouldInsertFencesForAtomic(const Instruction *I) const override;
-  bool
-  shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const override;
+  bool storeNeedsSeqCstTrailingFence(Instruction *I) const override;
 
   TargetLoweringBase::AtomicExpansionKind
   shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
diff --git a/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll b/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll
index 70f3b5cc488ea..e784042ebb1d3 100644
--- a/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll
+++ b/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll
@@ -1,13 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+lse -aarch64-enable-sink-fold=true < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+lse -mattr=+outline-atomics -aarch64-enable-sink-fold=true < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+outline-atomics -aarch64-enable-sink-fold=true < %s | FileCheck %s --check-prefix=OUTLINE-ATOMICS
 ; RUN: llc -mtriple=aarch64_be-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+lse -aarch64-enable-sink-fold=true < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+lse -aarch64-enable-sink-fold=true < %s | FileCheck %s --check-prefix=CHECK-REG
+; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+lse -aarch64-enable-sink-fold=true < %s | FileCheck %s --check-prefix="CHECK-REG" --allow-unused-prefixes --implicit-check-not="stlxrb {{w|x}}[[NEW:[0-9]+]], {{w|x}}[[NEW:[0-9]+]]], [x{{[0-9]+}}]"
 
-; Point of CHECK-REG is to make sure UNPREDICTABLE instructions aren't created
+; Point of implicit-check-not is to make sure UNPREDICTABLE instructions aren't created
 ; (i.e. reusing a register for status & data in store exclusive).
-; CHECK-REG-NOT: stlxrb w[[NEW:[0-9]+]], w[[NEW]], [x{{[0-9]+}}]
-; CHECK-REG-NOT: stlxrb w[[NEW:[0-9]+]], x[[NEW]], [x{{[0-9]+}}]
+; CHECK-REG: {{.*}}
+
+; RUN: llc -mtriple=aarch64-windows-pc-msvc -disable-post-ra -verify-machineinstrs \
+; RUN:   -mattr=+lse -aarch64-enable-sink-fold=true < %s | FileCheck %s --implicit-check-not="dmb"
+; RUN: llc -mtriple=aarch64-windows-pc-msvc -disable-post-ra -verify-machineinstrs \
+; RUN:  -mattr=+lse -mattr=+outline-atomics -aarch64-enable-sink-fold=true < %s | FileCheck %s --implicit-check-not="dmb"
+; RUN: llc -mtriple=aarch64-windows-pc-msvc -disable-post-ra -verify-machineinstrs \
+; RUN:   -mattr=+outline-atomics -aarch64-enable-sink-fold=true < %s | FileCheck %s --check-prefixes=MSVC-OUTLINE-ATOMICS
+
 
 @var8 = dso_local global i8 0
 @var16 = dso_local global i16 0
@@ -17,6 +25,12 @@
 
 define dso_local i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldaddalb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -25,19 +39,30 @@ define dso_local i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add ptr @var8, i8 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldaddalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_add_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldaddalh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -46,19 +71,30 @@ define dso_local i16 @test_atomic_load_add_i16(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add ptr @var16, i16 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldaddalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldaddal w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -67,19 +103,30 @@ define dso_local i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldaddal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldaddal x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -88,19 +135,30 @@ define dso_local i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldaddal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_add_i32_noret(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i32_noret:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldaddal w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -109,18 +167,29 @@ define dso_local void @test_atomic_load_add_i32_noret(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw add ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldaddal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_add_i64_noret(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i64_noret:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldaddal x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -129,18 +198,29 @@ define dso_local void @test_atomic_load_add_i64_noret(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw add ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldaddal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldsetalb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -149,19 +229,30 @@ define dso_local i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset1_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset1_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or ptr @var8, i8 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldsetalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldsetalh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -170,19 +261,30 @@ define dso_local i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset2_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset2_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or ptr @var16, i16 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldsetalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_or_i32(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldsetal w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -191,19 +293,30 @@ define dso_local i32 @test_atomic_load_or_i32(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldsetal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldsetal x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -212,19 +325,30 @@ define dso_local i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldsetal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_or_i32_noret(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i32_noret:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldsetal w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -233,18 +357,29 @@ define dso_local void @test_atomic_load_or_i32_noret(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw or ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldsetal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_or_i64_noret(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i64_noret:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldsetal x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -253,18 +388,29 @@ define dso_local void @test_atomic_load_or_i64_noret(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw or ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldsetal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_xor_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldeoralb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -273,19 +419,30 @@ define dso_local i8 @test_atomic_load_xor_i8(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor1_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor1_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor ptr @var8, i8 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldeoralb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_xor_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldeoralh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -294,19 +451,30 @@ define dso_local i16 @test_atomic_load_xor_i16(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor2_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor2_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor ptr @var16, i16 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldeoralh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_xor_i32(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldeoral w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -315,19 +483,30 @@ define dso_local i32 @test_atomic_load_xor_i32(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldeoral w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldeoral x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -336,19 +515,30 @@ define dso_local i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldeoral x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_xor_i32_noret(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i32_noret:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldeoral w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -357,18 +547,29 @@ define dso_local void @test_atomic_load_xor_i32_noret(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xor ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldeoral w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_xor_i64_noret(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i64_noret:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldeoral x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -377,658 +578,1077 @@ define dso_local void @test_atomic_load_xor_i64_noret(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xor ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldeoral x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_min_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldsminalb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB18_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxrb w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB18_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB18_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxrb w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB18_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min ptr @var8, i8 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldsminalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_min_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldsminalh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB19_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxrh w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    sxth w8, w10
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB19_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB19_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxrh w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    sxth w8, w10
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB19_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min ptr @var16, i16 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldsminalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldsminal w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB20_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, le
 ; OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB20_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB20_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB20_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldsminal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_min_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldsminal x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB21_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, le
 ; OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB21_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov x0, x8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov x8, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB21_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr x0, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x0, x8
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x10, x0, x8, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB21_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldsminal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_min_i32_noret(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i32_noret:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldsminal w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB22_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, le
 ; OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB22_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB22_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB22_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw min ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldsminal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_min_i64_noret(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i64_noret:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldsminal x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB23_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, le
 ; OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB23_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB23_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB23_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw min ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldsminal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    lduminalb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var8
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var8
 ; OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xff
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB24_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxrb w0, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w0, w9
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, ls
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB24_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xff
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB24_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxrb w0, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w0, w9
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB24_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var8, i8 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: lduminalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    lduminalh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var16
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var16
 ; OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xffff
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB25_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxrh w0, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w0, w9
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, ls
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB25_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xffff
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB25_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxrh w0, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w0, w9
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB25_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var16, i16 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: lduminalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_umin_i32(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    lduminal w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB26_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
 ; OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB26_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB26_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB26_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: lduminal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_umin_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    lduminal x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB27_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, ls
 ; OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB27_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov x0, x8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov x8, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB27_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr x0, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x0, x8
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x10, x0, x8, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB27_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: lduminal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_umin_i32_noret(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i32_noret:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    lduminal w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB28_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, ls
 ; OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB28_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB28_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB28_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umin ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: lduminal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_umin_i64_noret(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i64_noret:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    lduminal x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB29_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, ls
 ; OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB29_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB29_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB29_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umin ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: lduminal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldsmaxalb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB30_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxrb w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB30_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB30_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxrb w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB30_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max ptr @var8, i8 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldsmaxalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_max_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldsmaxalh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB31_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxrh w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    sxth w8, w10
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB31_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB31_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxrh w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    sxth w8, w10
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB31_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max ptr @var16, i16 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldsmaxalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_max_i32(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldsmaxal w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB32_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, gt
 ; OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB32_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB32_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB32_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldsmaxal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldsmaxal x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB33_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, gt
 ; OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB33_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov x0, x8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov x8, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB33_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr x0, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x0, x8
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x10, x0, x8, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB33_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldsmaxal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_max_i32_noret(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i32_noret:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldsmaxal w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB34_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, gt
 ; OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB34_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB34_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB34_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw max ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldsmaxal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_max_i64_noret(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i64_noret:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldsmaxal x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB35_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, gt
 ; OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB35_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB35_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB35_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw max ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldsmaxal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldumaxalb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var8
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var8
 ; OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xff
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB36_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxrb w0, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w0, w9
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, hi
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB36_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xff
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB36_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxrb w0, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w0, w9
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB36_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var8, i8 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldumaxalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldumaxalh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var16
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var16
 ; OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xffff
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB37_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxrh w0, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w0, w9
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, hi
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB37_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xffff
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB37_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxrh w0, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w0, w9
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB37_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var16, i16 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldumaxalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_umax_i32(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldumaxal w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB38_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
 ; OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB38_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB38_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB38_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldumaxal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldumaxal x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB39_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, hi
 ; OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB39_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov x0, x8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov x8, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB39_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr x0, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x0, x8
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x10, x0, x8, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB39_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldumaxal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_umax_i32_noret(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i32_noret:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldumaxal w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB40_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, hi
 ; OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB40_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB40_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB40_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umax ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldumaxal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_umax_i64_noret(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i64_noret:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldumaxal x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB41_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, hi
 ; OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB41_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB41_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB41_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umax ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldumaxal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    swpalb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1037,19 +1657,30 @@ define dso_local i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp1_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp1_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg ptr @var8, i8 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: swpalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    swpalh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1058,19 +1689,30 @@ define dso_local i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp2_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp2_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg ptr @var16, i16 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: swpalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    swpal w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1079,19 +1721,30 @@ define dso_local i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: swpal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    swpal x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1100,19 +1753,30 @@ define dso_local i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: swpal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_xchg_i32_noret(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i32_noret:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    swpal w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1121,19 +1785,30 @@ define dso_local void @test_atomic_load_xchg_i32_noret(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xchg ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: swpal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret void
 }
 
 define dso_local void @test_atomic_load_xchg_i64_noret(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i64_noret:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    swpal x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1142,19 +1817,30 @@ define dso_local void @test_atomic_load_xchg_i64_noret(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xchg ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: swpal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret void
 }
 
 define dso_local i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    casab w0, w1, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i8:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1163,20 +1849,33 @@ define dso_local i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas1_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i8:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x2, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas1_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg ptr @var8, i8 %wanted, i8 %new acquire acquire
    %old = extractvalue { i8, i1 } %pair, 0
 
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
-; CHECK-NEXT: casab w0, w1, [x[[ADDR]]]
-; CHECK-NEXT: ret
 
    ret i8 %old
 }
 
 define dso_local i1 @test_atomic_cmpxchg_i8_1(i8 %wanted, i8 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i8_1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    adrp x9, var8
+; CHECK-NEXT:    add x9, x9, :lo12:var8
+; CHECK-NEXT:    casab w8, w1, [x9]
+; CHECK-NEXT:    cmp w8, w0, uxtb
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i8_1:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
@@ -1188,22 +1887,33 @@ define dso_local i1 @test_atomic_cmpxchg_i8_1(i8 %wanted, i8 %new) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    cset w0, eq
 ; OUTLINE-ATOMICS-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i8_1:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w19, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x2, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas1_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w0, w19, uxtb
+; MSVC-OUTLINE-ATOMICS-NEXT:    cset w0, eq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg ptr @var8, i8 %wanted, i8 %new acquire acquire
    %success = extractvalue { i8, i1 } %pair, 1
 
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: casab w[[NEW:[0-9]+]], w1, [x[[ADDR]]]
-; CHECK-NEXT: cmp w[[NEW]], w0, uxtb
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
    ret i1 %success
 }
 
 define dso_local i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    casah w0, w1, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i16:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1212,20 +1922,33 @@ define dso_local i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas2_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i16:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x2, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas2_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg ptr @var16, i16 %wanted, i16 %new acquire acquire
    %old = extractvalue { i16, i1 } %pair, 0
 
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
-; CHECK-NEXT: casah w0, w1, [x[[ADDR]]]
-; CHECK-NEXT: ret
 
    ret i16 %old
 }
 
 define dso_local i1 @test_atomic_cmpxchg_i16_1(i16 %wanted, i16 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i16_1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    adrp x9, var16
+; CHECK-NEXT:    add x9, x9, :lo12:var16
+; CHECK-NEXT:    casah w8, w1, [x9]
+; CHECK-NEXT:    cmp w8, w0, uxth
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i16_1:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
@@ -1237,23 +1960,34 @@ define dso_local i1 @test_atomic_cmpxchg_i16_1(i16 %wanted, i16 %new) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    cset w0, eq
 ; OUTLINE-ATOMICS-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i16_1:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w19, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x2, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas2_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w0, w19, uxth
+; MSVC-OUTLINE-ATOMICS-NEXT:    cset w0, eq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg ptr @var16, i16 %wanted, i16 %new acquire acquire
    %success = extractvalue { i16, i1 } %pair, 1
 
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: casah w[[NEW:[0-9]+]], w1, [x[[ADDR]]]
-; CHECK-NEXT: cmp w[[NEW]], w0, uxth
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
 
    ret i1 %success
 }
 
 define dso_local i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    casa w0, w1, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1262,21 +1996,31 @@ define dso_local i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas4_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x2, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas4_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg ptr @var32, i32 %wanted, i32 %new acquire acquire
    %old = extractvalue { i32, i1 } %pair, 0
 
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: casa w0, w1, [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i32 @test_atomic_cmpxchg_i32_monotonic_acquire(i32 %wanted, i32 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i32_monotonic_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    casa w0, w1, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32_monotonic_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1285,21 +2029,31 @@ define dso_local i32 @test_atomic_cmpxchg_i32_monotonic_acquire(i32 %wanted, i32
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas4_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32_monotonic_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x2, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas4_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg ptr @var32, i32 %wanted, i32 %new monotonic acquire
    %old = extractvalue { i32, i1 } %pair, 0
 
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: casa w0, w1, [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    casa x0, x1, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i64:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1308,21 +2062,35 @@ define dso_local i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas8_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i64:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x2, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas8_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg ptr @var64, i64 %wanted, i64 %new acquire acquire
    %old = extractvalue { i64, i1 } %pair, 0
 
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: casa x0, x1, [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local i128 @test_atomic_cmpxchg_i128(i128 %wanted, i128 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
+; CHECK-NEXT:    // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1
+; CHECK-NEXT:    // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
+; CHECK-NEXT:    // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1
+; CHECK-NEXT:    adrp x8, var128
+; CHECK-NEXT:    add x8, x8, :lo12:var128
+; CHECK-NEXT:    caspa x0, x1, x2, x3, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i128:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1331,21 +2099,35 @@ define dso_local i128 @test_atomic_cmpxchg_i128(i128 %wanted, i128 %new) nounwin
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas16_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i128:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x4, var128
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x4, x4, :lo12:var128
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas16_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg ptr @var128, i128 %wanted, i128 %new acquire acquire
    %old = extractvalue { i128, i1 } %pair, 0
 
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var128
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128
 
-; CHECK: caspa x0, x1, x2, x3, [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i128 %old
 }
 
 define dso_local i128 @test_atomic_cmpxchg_i128_monotonic_seqcst(i128 %wanted, i128 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i128_monotonic_seqcst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
+; CHECK-NEXT:    // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1
+; CHECK-NEXT:    // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
+; CHECK-NEXT:    // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1
+; CHECK-NEXT:    adrp x8, var128
+; CHECK-NEXT:    add x8, x8, :lo12:var128
+; CHECK-NEXT:    caspal x0, x1, x2, x3, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i128_monotonic_seqcst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1354,21 +2136,35 @@ define dso_local i128 @test_atomic_cmpxchg_i128_monotonic_seqcst(i128 %wanted, i
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas16_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i128_monotonic_seqcst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x4, var128
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x4, x4, :lo12:var128
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas16_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg ptr @var128, i128 %wanted, i128 %new monotonic seq_cst
    %old = extractvalue { i128, i1 } %pair, 0
 
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var128
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128
 
-; CHECK: caspal x0, x1, x2, x3, [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i128 %old
 }
 
 define dso_local i128 @test_atomic_cmpxchg_i128_release_acquire(i128 %wanted, i128 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i128_release_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
+; CHECK-NEXT:    // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1
+; CHECK-NEXT:    // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
+; CHECK-NEXT:    // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1
+; CHECK-NEXT:    adrp x8, var128
+; CHECK-NEXT:    add x8, x8, :lo12:var128
+; CHECK-NEXT:    caspal x0, x1, x2, x3, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i128_release_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1377,21 +2173,32 @@ define dso_local i128 @test_atomic_cmpxchg_i128_release_acquire(i128 %wanted, i1
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas16_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i128_release_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x4, var128
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x4, x4, :lo12:var128
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas16_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg ptr @var128, i128 %wanted, i128 %new release acquire
    %old = extractvalue { i128, i1 } %pair, 0
 
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var128
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128
 
-; CHECK: caspal x0, x1, x2, x3, [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i128 %old
 }
 
 define dso_local i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w8, w0
+; CHECK-NEXT:    adrp x9, var8
+; CHECK-NEXT:    add x9, x9, :lo12:var8
+; CHECK-NEXT:    ldaddalb w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1401,20 +2208,32 @@ define dso_local i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub ptr @var8, i8 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldaddalb w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_sub_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w8, w0
+; CHECK-NEXT:    adrp x9, var16
+; CHECK-NEXT:    add x9, x9, :lo12:var16
+; CHECK-NEXT:    ldaddalh w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1424,20 +2243,32 @@ define dso_local i16 @test_atomic_load_sub_i16(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub ptr @var16, i16 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldaddalh w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_sub_i32(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w8, w0
+; CHECK-NEXT:    adrp x9, var32
+; CHECK-NEXT:    add x9, x9, :lo12:var32
+; CHECK-NEXT:    ldaddal w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1447,20 +2278,32 @@ define dso_local i32 @test_atomic_load_sub_i32(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldaddal w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg x8, x0
+; CHECK-NEXT:    adrp x9, var64
+; CHECK-NEXT:    add x9, x9, :lo12:var64
+; CHECK-NEXT:    ldaddal x8, x0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1470,20 +2313,32 @@ define dso_local i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg x0, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldaddal x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret i64 %old
 }
 
 define dso_local void @test_atomic_load_sub_i32_noret(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i32_noret:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w8, w0
+; CHECK-NEXT:    adrp x9, var32
+; CHECK-NEXT:    add x9, x9, :lo12:var32
+; CHECK-NEXT:    ldaddal w8, w8, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1493,20 +2348,32 @@ define dso_local void @test_atomic_load_sub_i32_noret(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw sub ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldaddal w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret void
 }
 
 define dso_local void @test_atomic_load_sub_i64_noret(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i64_noret:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg x8, x0
+; CHECK-NEXT:    adrp x9, var64
+; CHECK-NEXT:    add x9, x9, :lo12:var64
+; CHECK-NEXT:    ldaddal x8, x8, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1516,112 +2383,171 @@ define dso_local void @test_atomic_load_sub_i64_noret(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg x0, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw sub ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldaddal x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret void
 }
 
 define dso_local i8 @test_atomic_load_sub_i8_neg_imm() nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i8_neg_imm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    adrp x9, var8
+; CHECK-NEXT:    add x9, x9, :lo12:var8
+; CHECK-NEXT:    ldaddalb w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_neg_imm:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
 ; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
-; OUTLINE-ATOMICS-NEXT:    mov w0, #1
+; OUTLINE-ATOMICS-NEXT:    mov w0, #1 // =0x1
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_neg_imm:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, #1 // =0x1
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub ptr @var8, i8 -1 seq_cst
 
-; CHECK-NOT: dmb
-; CHECK: mov w[[IMM:[0-9]+]], #1
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
-; CHECK: ldaddalb w[[IMM]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_sub_i16_neg_imm() nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i16_neg_imm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    adrp x9, var16
+; CHECK-NEXT:    add x9, x9, :lo12:var16
+; CHECK-NEXT:    ldaddalh w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_neg_imm:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
 ; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
-; OUTLINE-ATOMICS-NEXT:    mov w0, #1
+; OUTLINE-ATOMICS-NEXT:    mov w0, #1 // =0x1
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_neg_imm:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, #1 // =0x1
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub ptr @var16, i16 -1 seq_cst
 
-; CHECK-NOT: dmb
-; CHECK: mov w[[IMM:[0-9]+]], #1
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
-; CHECK: ldaddalh w[[IMM]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_sub_i32_neg_imm() nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i32_neg_imm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    adrp x9, var32
+; CHECK-NEXT:    add x9, x9, :lo12:var32
+; CHECK-NEXT:    ldaddal w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_neg_imm:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
 ; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:    mov w0, #1
+; OUTLINE-ATOMICS-NEXT:    mov w0, #1 // =0x1
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_neg_imm:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, #1 // =0x1
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub ptr @var32, i32 -1 seq_cst
 
-; CHECK-NOT: dmb
-; CHECK: mov w[[IMM:[0-9]+]], #1
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
-; CHECK: ldaddal w[[IMM]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_sub_i64_neg_imm() nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i64_neg_imm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    adrp x9, var64
+; CHECK-NEXT:    add x9, x9, :lo12:var64
+; CHECK-NEXT:    ldaddal x8, x0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_neg_imm:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
 ; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:    mov w0, #1
+; OUTLINE-ATOMICS-NEXT:    mov w0, #1 // =0x1
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_neg_imm:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, #1 // =0x1
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub ptr @var64, i64 -1 seq_cst
 
-; CHECK-NOT: dmb
-; CHECK: mov w[[IMM:[0-9]+]], #1
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
-; CHECK: ldaddal x[[IMM]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret i64 %old
 }
 
 define dso_local i8 @test_atomic_load_sub_i8_neg_arg(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i8_neg_arg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldaddalb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_neg_arg:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1630,20 +2556,31 @@ define dso_local i8 @test_atomic_load_sub_i8_neg_arg(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_neg_arg:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %neg = sub i8 0, %offset
   %old = atomicrmw sub ptr @var8, i8 %neg seq_cst
 
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
-; CHECK: ldaddalb w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_sub_i16_neg_arg(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i16_neg_arg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldaddalh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_neg_arg:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1652,20 +2589,31 @@ define dso_local i16 @test_atomic_load_sub_i16_neg_arg(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_neg_arg:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %neg = sub i16 0, %offset
   %old = atomicrmw sub ptr @var16, i16 %neg seq_cst
 
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
-; CHECK: ldaddalh w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_sub_i32_neg_arg(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i32_neg_arg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldaddal w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_neg_arg:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1674,20 +2622,31 @@ define dso_local i32 @test_atomic_load_sub_i32_neg_arg(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_neg_arg:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %neg = sub i32 0, %offset
   %old = atomicrmw sub ptr @var32, i32 %neg seq_cst
 
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
-; CHECK: ldaddal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_sub_i64_neg_arg(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i64_neg_arg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldaddal x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_neg_arg:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1696,20 +2655,32 @@ define dso_local i64 @test_atomic_load_sub_i64_neg_arg(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_neg_arg:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %neg = sub i64 0, %offset
   %old = atomicrmw sub ptr @var64, i64 %neg seq_cst
 
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
-; CHECK: ldaddal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret i64 %old
 }
 
 define dso_local i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn w8, w0
+; CHECK-NEXT:    adrp x9, var8
+; CHECK-NEXT:    add x9, x9, :lo12:var8
+; CHECK-NEXT:    ldclralb w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1719,19 +2690,31 @@ define dso_local i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr1_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr1_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and ptr @var8, i8 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldclralb w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_and_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn w8, w0
+; CHECK-NEXT:    adrp x9, var16
+; CHECK-NEXT:    add x9, x9, :lo12:var16
+; CHECK-NEXT:    ldclralh w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1741,19 +2724,31 @@ define dso_local i16 @test_atomic_load_and_i16(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr2_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr2_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and ptr @var16, i16 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldclralh w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_and_i32(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn w8, w0
+; CHECK-NEXT:    adrp x9, var32
+; CHECK-NEXT:    add x9, x9, :lo12:var32
+; CHECK-NEXT:    ldclral w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1763,19 +2758,31 @@ define dso_local i32 @test_atomic_load_and_i32(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldclral w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_and_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn x8, x0
+; CHECK-NEXT:    adrp x9, var64
+; CHECK-NEXT:    add x9, x9, :lo12:var64
+; CHECK-NEXT:    ldclral x8, x0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1785,103 +2792,162 @@ define dso_local i64 @test_atomic_load_and_i64(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn x0, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldclral x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret i64 %old
 }
 
 define dso_local i8 @test_atomic_load_and_i8_inv_imm() nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i8_inv_imm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    adrp x9, var8
+; CHECK-NEXT:    add x9, x9, :lo12:var8
+; CHECK-NEXT:    ldclralb w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_inv_imm:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; OUTLINE-ATOMICS-NEXT:    adrp x1, var8
 ; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
-; OUTLINE-ATOMICS-NEXT:    mov w0, #1
+; OUTLINE-ATOMICS-NEXT:    mov w0, #1 // =0x1
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr1_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_inv_imm:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, #1 // =0x1
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr1_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and ptr @var8, i8 -2 seq_cst
-; CHECK-NOT: dmb
-; CHECK: mov w[[CONST:[0-9]+]], #1
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
-; CHECK: ldclralb w[[CONST]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_and_i16_inv_imm() nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i16_inv_imm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    adrp x9, var16
+; CHECK-NEXT:    add x9, x9, :lo12:var16
+; CHECK-NEXT:    ldclralh w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_inv_imm:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; OUTLINE-ATOMICS-NEXT:    adrp x1, var16
 ; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
-; OUTLINE-ATOMICS-NEXT:    mov w0, #1
+; OUTLINE-ATOMICS-NEXT:    mov w0, #1 // =0x1
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr2_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_inv_imm:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, #1 // =0x1
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr2_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and ptr @var16, i16 -2 seq_cst
-; CHECK-NOT: dmb
-; CHECK: mov w[[CONST:[0-9]+]], #1
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
-; CHECK: ldclralh w[[CONST]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_and_i32_inv_imm() nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i32_inv_imm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    adrp x9, var32
+; CHECK-NEXT:    add x9, x9, :lo12:var32
+; CHECK-NEXT:    ldclral w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_inv_imm:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; OUTLINE-ATOMICS-NEXT:    adrp x1, var32
 ; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:    mov w0, #1
+; OUTLINE-ATOMICS-NEXT:    mov w0, #1 // =0x1
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_inv_imm:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, #1 // =0x1
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and ptr @var32, i32 -2 seq_cst
-; CHECK-NOT: dmb
-; CHECK: mov w[[CONST:[0-9]+]], #1
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
-; CHECK: ldclral w[[CONST]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_and_i64_inv_imm() nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i64_inv_imm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    adrp x9, var64
+; CHECK-NEXT:    add x9, x9, :lo12:var64
+; CHECK-NEXT:    ldclral x8, x0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_inv_imm:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; OUTLINE-ATOMICS-NEXT:    adrp x1, var64
 ; OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:    mov w0, #1
+; OUTLINE-ATOMICS-NEXT:    mov w0, #1 // =0x1
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_inv_imm:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, #1 // =0x1
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and ptr @var64, i64 -2 seq_cst
-; CHECK-NOT: dmb
-; CHECK: mov w[[CONST:[0-9]+]], #1
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
-; CHECK: ldclral x[[CONST]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret i64 %old
 }
 
 define dso_local i8 @test_atomic_load_and_i8_inv_arg(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i8_inv_arg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldclralb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_inv_arg:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1890,18 +2956,29 @@ define dso_local i8 @test_atomic_load_and_i8_inv_arg(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr1_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_inv_arg:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr1_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %inv = xor i8 %offset, -1
   %old = atomicrmw and ptr @var8, i8 %inv seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
-; CHECK: ldclralb w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_and_i16_inv_arg(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i16_inv_arg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldclralh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_inv_arg:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1910,18 +2987,29 @@ define dso_local i16 @test_atomic_load_and_i16_inv_arg(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr2_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_inv_arg:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr2_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %inv = xor i16 %offset, -1
   %old = atomicrmw and ptr @var16, i16 %inv seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
-; CHECK: ldclralh w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_and_i32_inv_arg(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i32_inv_arg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldclral w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_inv_arg:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1930,18 +3018,29 @@ define dso_local i32 @test_atomic_load_and_i32_inv_arg(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_inv_arg:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %inv = xor i32 %offset, -1
   %old = atomicrmw and ptr @var32, i32 %inv seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
-; CHECK: ldclral w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_and_i64_inv_arg(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i64_inv_arg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldclral x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_inv_arg:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1950,18 +3049,30 @@ define dso_local i64 @test_atomic_load_and_i64_inv_arg(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_inv_arg:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %inv = xor i64 %offset, -1
   %old = atomicrmw and ptr @var64, i64 %inv seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
-; CHECK: ldclral x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret i64 %old
 }
 
 define dso_local void @test_atomic_load_and_i32_noret(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i32_noret:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn w8, w0
+; CHECK-NEXT:    adrp x9, var32
+; CHECK-NEXT:    add x9, x9, :lo12:var32
+; CHECK-NEXT:    ldclral w8, w8, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1971,19 +3082,31 @@ define dso_local void @test_atomic_load_and_i32_noret(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw and ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldclral w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_and_i64_noret(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i64_noret:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn x8, x0
+; CHECK-NEXT:    adrp x9, var64
+; CHECK-NEXT:    add x9, x9, :lo12:var64
+; CHECK-NEXT:    ldclral x8, x8, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -1993,19 +3116,30 @@ define dso_local void @test_atomic_load_and_i64_noret(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn x0, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw and ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldclral x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_add_i8_acq_rel(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i8_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldaddalb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2014,19 +3148,29 @@ define dso_local i8 @test_atomic_load_add_i8_acq_rel(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add ptr @var8, i8 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldaddalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_add_i16_acq_rel(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i16_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldaddalh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2035,19 +3179,29 @@ define dso_local i16 @test_atomic_load_add_i16_acq_rel(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add ptr @var16, i16 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldaddalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_add_i32_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i32_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldaddal w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2056,19 +3210,29 @@ define dso_local i32 @test_atomic_load_add_i32_acq_rel(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add ptr @var32, i32 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldaddal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_add_i64_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i64_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldaddal x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2077,19 +3241,29 @@ define dso_local i64 @test_atomic_load_add_i64_acq_rel(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add ptr @var64, i64 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldaddal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_add_i32_noret_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i32_noret_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldaddal w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2098,18 +3272,28 @@ define dso_local void @test_atomic_load_add_i32_noret_acq_rel(i32 %offset) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw add ptr @var32, i32 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldaddal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_add_i64_noret_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i64_noret_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldaddal x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2118,18 +3302,28 @@ define dso_local void @test_atomic_load_add_i64_noret_acq_rel(i64 %offset) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw add ptr @var64, i64 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldaddal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_add_i8_acquire(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i8_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldaddab w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2138,19 +3332,29 @@ define dso_local i8 @test_atomic_load_add_i8_acquire(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add ptr @var8, i8 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldaddab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_add_i16_acquire(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i16_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldaddah w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2159,19 +3363,29 @@ define dso_local i16 @test_atomic_load_add_i16_acquire(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add ptr @var16, i16 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldaddah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_add_i32_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i32_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldadda w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2180,19 +3394,29 @@ define dso_local i32 @test_atomic_load_add_i32_acquire(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add ptr @var32, i32 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldadda w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_add_i64_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i64_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldadda x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2201,19 +3425,29 @@ define dso_local i64 @test_atomic_load_add_i64_acquire(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add ptr @var64, i64 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldadda x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_add_i32_noret_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i32_noret_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldadda w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2222,18 +3456,28 @@ define dso_local void @test_atomic_load_add_i32_noret_acquire(i32 %offset) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw add ptr @var32, i32 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldadda w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_add_i64_noret_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i64_noret_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldadda x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2242,18 +3486,28 @@ define dso_local void @test_atomic_load_add_i64_noret_acquire(i64 %offset) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw add ptr @var64, i64 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldadda x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_add_i8_monotonic(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i8_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldaddb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2262,19 +3516,29 @@ define dso_local i8 @test_atomic_load_add_i8_monotonic(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add ptr @var8, i8 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldaddb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_add_i16_monotonic(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i16_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldaddh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2283,19 +3547,29 @@ define dso_local i16 @test_atomic_load_add_i16_monotonic(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add ptr @var16, i16 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldaddh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_add_i32_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i32_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldadd w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2304,19 +3578,29 @@ define dso_local i32 @test_atomic_load_add_i32_monotonic(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add ptr @var32, i32 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldadd w[[OLD:[0-9]+]], w[[NEW:[0-9,a-z]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_add_i64_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i64_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldadd x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2325,19 +3609,29 @@ define dso_local i64 @test_atomic_load_add_i64_monotonic(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add ptr @var64, i64 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldadd x[[OLD:[0-9]+]], x[[NEW:[0-9,a-z]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_add_i32_noret_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i32_noret_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldadd w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2346,18 +3640,28 @@ define dso_local void @test_atomic_load_add_i32_noret_monotonic(i32 %offset) nou
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw add ptr @var32, i32 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldadd w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_add_i64_noret_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i64_noret_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldadd x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2366,18 +3670,28 @@ define dso_local void @test_atomic_load_add_i64_noret_monotonic(i64 %offset) nou
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw add ptr @var64, i64 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldadd x{{[0-9]}}, x{{[1-9][0-9]*}}, [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_add_i8_release(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i8_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldaddlb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2386,19 +3700,29 @@ define dso_local i8 @test_atomic_load_add_i8_release(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add ptr @var8, i8 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldaddlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_add_i16_release(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i16_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldaddlh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2407,19 +3731,29 @@ define dso_local i16 @test_atomic_load_add_i16_release(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add ptr @var16, i16 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldaddlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_add_i32_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i32_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldaddl w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2428,19 +3762,29 @@ define dso_local i32 @test_atomic_load_add_i32_release(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add ptr @var32, i32 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldaddl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_add_i64_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i64_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldaddl x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2449,19 +3793,29 @@ define dso_local i64 @test_atomic_load_add_i64_release(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add ptr @var64, i64 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldaddl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_add_i32_noret_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i32_noret_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldaddl w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2470,18 +3824,28 @@ define dso_local void @test_atomic_load_add_i32_noret_release(i32 %offset) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw add ptr @var32, i32 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldaddl w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_add_i64_noret_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i64_noret_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldaddl x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2490,18 +3854,28 @@ define dso_local void @test_atomic_load_add_i64_noret_release(i64 %offset) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw add ptr @var64, i64 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldaddl x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_add_i8_seq_cst(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i8_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldaddalb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2510,19 +3884,30 @@ define dso_local i8 @test_atomic_load_add_i8_seq_cst(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add ptr @var8, i8 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldaddalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_add_i16_seq_cst(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i16_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldaddalh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2531,19 +3916,30 @@ define dso_local i16 @test_atomic_load_add_i16_seq_cst(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add ptr @var16, i16 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldaddalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_add_i32_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i32_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldaddal w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2552,19 +3948,30 @@ define dso_local i32 @test_atomic_load_add_i32_seq_cst(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldaddal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_add_i64_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i64_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldaddal x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2573,19 +3980,30 @@ define dso_local i64 @test_atomic_load_add_i64_seq_cst(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw add ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldaddal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_add_i32_noret_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i32_noret_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldaddal w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2594,18 +4012,29 @@ define dso_local void @test_atomic_load_add_i32_noret_seq_cst(i32 %offset) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw add ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldaddal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_add_i64_noret_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i64_noret_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldaddal x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2614,18 +4043,30 @@ define dso_local void @test_atomic_load_add_i64_noret_seq_cst(i64 %offset) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw add ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldaddal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_and_i8_acq_rel(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i8_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn w8, w0
+; CHECK-NEXT:    adrp x9, var8
+; CHECK-NEXT:    add x9, x9, :lo12:var8
+; CHECK-NEXT:    ldclralb w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2635,19 +4076,30 @@ define dso_local i8 @test_atomic_load_and_i8_acq_rel(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr1_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr1_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and ptr @var8, i8 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldclralb w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_and_i16_acq_rel(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i16_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn w8, w0
+; CHECK-NEXT:    adrp x9, var16
+; CHECK-NEXT:    add x9, x9, :lo12:var16
+; CHECK-NEXT:    ldclralh w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2657,19 +4109,30 @@ define dso_local i16 @test_atomic_load_and_i16_acq_rel(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr2_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr2_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and ptr @var16, i16 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldclralh w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_and_i32_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i32_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn w8, w0
+; CHECK-NEXT:    adrp x9, var32
+; CHECK-NEXT:    add x9, x9, :lo12:var32
+; CHECK-NEXT:    ldclral w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2679,19 +4142,30 @@ define dso_local i32 @test_atomic_load_and_i32_acq_rel(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and ptr @var32, i32 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldclral w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_and_i64_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i64_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn x8, x0
+; CHECK-NEXT:    adrp x9, var64
+; CHECK-NEXT:    add x9, x9, :lo12:var64
+; CHECK-NEXT:    ldclral x8, x0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2701,19 +4175,30 @@ define dso_local i64 @test_atomic_load_and_i64_acq_rel(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn x0, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and ptr @var64, i64 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldclral x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret i64 %old
 }
 
 define dso_local void @test_atomic_load_and_i32_noret_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i32_noret_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn w8, w0
+; CHECK-NEXT:    adrp x9, var32
+; CHECK-NEXT:    add x9, x9, :lo12:var32
+; CHECK-NEXT:    ldclral w8, w8, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2723,19 +4208,30 @@ define dso_local void @test_atomic_load_and_i32_noret_acq_rel(i32 %offset) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw and ptr @var32, i32 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldclral w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_and_i64_noret_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i64_noret_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn x8, x0
+; CHECK-NEXT:    adrp x9, var64
+; CHECK-NEXT:    add x9, x9, :lo12:var64
+; CHECK-NEXT:    ldclral x8, x8, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2745,19 +4241,30 @@ define dso_local void @test_atomic_load_and_i64_noret_acq_rel(i64 %offset) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn x0, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw and ptr @var64, i64 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldclral x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_and_i8_acquire(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i8_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn w8, w0
+; CHECK-NEXT:    adrp x9, var8
+; CHECK-NEXT:    add x9, x9, :lo12:var8
+; CHECK-NEXT:    ldclrab w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2767,19 +4274,30 @@ define dso_local i8 @test_atomic_load_and_i8_acquire(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr1_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr1_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and ptr @var8, i8 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldclrab w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_and_i16_acquire(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i16_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn w8, w0
+; CHECK-NEXT:    adrp x9, var16
+; CHECK-NEXT:    add x9, x9, :lo12:var16
+; CHECK-NEXT:    ldclrah w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2789,19 +4307,30 @@ define dso_local i16 @test_atomic_load_and_i16_acquire(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr2_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr2_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and ptr @var16, i16 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldclrah w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_and_i32_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i32_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn w8, w0
+; CHECK-NEXT:    adrp x9, var32
+; CHECK-NEXT:    add x9, x9, :lo12:var32
+; CHECK-NEXT:    ldclra w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2811,19 +4340,30 @@ define dso_local i32 @test_atomic_load_and_i32_acquire(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and ptr @var32, i32 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldclra w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_and_i64_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i64_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn x8, x0
+; CHECK-NEXT:    adrp x9, var64
+; CHECK-NEXT:    add x9, x9, :lo12:var64
+; CHECK-NEXT:    ldclra x8, x0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2833,19 +4373,30 @@ define dso_local i64 @test_atomic_load_and_i64_acquire(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn x0, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and ptr @var64, i64 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldclra x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret i64 %old
 }
 
 define dso_local void @test_atomic_load_and_i32_noret_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i32_noret_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn w8, w0
+; CHECK-NEXT:    adrp x9, var32
+; CHECK-NEXT:    add x9, x9, :lo12:var32
+; CHECK-NEXT:    ldclra w8, w8, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2855,19 +4406,30 @@ define dso_local void @test_atomic_load_and_i32_noret_acquire(i32 %offset) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw and ptr @var32, i32 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldclra w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_and_i64_noret_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i64_noret_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn x8, x0
+; CHECK-NEXT:    adrp x9, var64
+; CHECK-NEXT:    add x9, x9, :lo12:var64
+; CHECK-NEXT:    ldclra x8, x8, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2877,19 +4439,30 @@ define dso_local void @test_atomic_load_and_i64_noret_acquire(i64 %offset) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn x0, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw and ptr @var64, i64 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldclra x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_and_i8_monotonic(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i8_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn w8, w0
+; CHECK-NEXT:    adrp x9, var8
+; CHECK-NEXT:    add x9, x9, :lo12:var8
+; CHECK-NEXT:    ldclrb w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2899,19 +4472,30 @@ define dso_local i8 @test_atomic_load_and_i8_monotonic(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr1_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr1_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and ptr @var8, i8 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldclrb w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_and_i16_monotonic(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i16_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn w8, w0
+; CHECK-NEXT:    adrp x9, var16
+; CHECK-NEXT:    add x9, x9, :lo12:var16
+; CHECK-NEXT:    ldclrh w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2921,19 +4505,30 @@ define dso_local i16 @test_atomic_load_and_i16_monotonic(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr2_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr2_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and ptr @var16, i16 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldclrh w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_and_i32_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i32_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn w8, w0
+; CHECK-NEXT:    adrp x9, var32
+; CHECK-NEXT:    add x9, x9, :lo12:var32
+; CHECK-NEXT:    ldclr w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2943,19 +4538,30 @@ define dso_local i32 @test_atomic_load_and_i32_monotonic(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and ptr @var32, i32 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldclr w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_and_i64_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i64_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn x8, x0
+; CHECK-NEXT:    adrp x9, var64
+; CHECK-NEXT:    add x9, x9, :lo12:var64
+; CHECK-NEXT:    ldclr x8, x0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2965,19 +4571,30 @@ define dso_local i64 @test_atomic_load_and_i64_monotonic(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn x0, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and ptr @var64, i64 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldclr x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret i64 %old
 }
 
 define dso_local void @test_atomic_load_and_i32_noret_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i32_noret_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn w8, w0
+; CHECK-NEXT:    adrp x9, var32
+; CHECK-NEXT:    add x9, x9, :lo12:var32
+; CHECK-NEXT:    ldclr w8, w8, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -2987,19 +4604,30 @@ define dso_local void @test_atomic_load_and_i32_noret_monotonic(i32 %offset) nou
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw and ptr @var32, i32 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldclr w{{[0-9]+}}, w[[NEW:[1-9][0-9]*]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_and_i64_noret_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i64_noret_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn x8, x0
+; CHECK-NEXT:    adrp x9, var64
+; CHECK-NEXT:    add x9, x9, :lo12:var64
+; CHECK-NEXT:    ldclr x8, x8, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -3009,19 +4637,30 @@ define dso_local void @test_atomic_load_and_i64_noret_monotonic(i64 %offset) nou
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn x0, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw and ptr @var64, i64 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldclr x{{[0-9]+}}, x[[NEW:[1-9][0-9]*]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_and_i8_release(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i8_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn w8, w0
+; CHECK-NEXT:    adrp x9, var8
+; CHECK-NEXT:    add x9, x9, :lo12:var8
+; CHECK-NEXT:    ldclrlb w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -3031,19 +4670,30 @@ define dso_local i8 @test_atomic_load_and_i8_release(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr1_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr1_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and ptr @var8, i8 %offset release
-; CHECK-NOT: dmb
-; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldclrlb w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_and_i16_release(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i16_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn w8, w0
+; CHECK-NEXT:    adrp x9, var16
+; CHECK-NEXT:    add x9, x9, :lo12:var16
+; CHECK-NEXT:    ldclrlh w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -3053,19 +4703,30 @@ define dso_local i16 @test_atomic_load_and_i16_release(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr2_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr2_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and ptr @var16, i16 %offset release
-; CHECK-NOT: dmb
-; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldclrlh w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_and_i32_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i32_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn w8, w0
+; CHECK-NEXT:    adrp x9, var32
+; CHECK-NEXT:    add x9, x9, :lo12:var32
+; CHECK-NEXT:    ldclrl w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -3075,19 +4736,30 @@ define dso_local i32 @test_atomic_load_and_i32_release(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and ptr @var32, i32 %offset release
-; CHECK-NOT: dmb
-; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldclrl w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_and_i64_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i64_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn x8, x0
+; CHECK-NEXT:    adrp x9, var64
+; CHECK-NEXT:    add x9, x9, :lo12:var64
+; CHECK-NEXT:    ldclrl x8, x0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -3097,19 +4769,30 @@ define dso_local i64 @test_atomic_load_and_i64_release(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn x0, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and ptr @var64, i64 %offset release
-; CHECK-NOT: dmb
-; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldclrl x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret i64 %old
 }
 
 define dso_local void @test_atomic_load_and_i32_noret_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i32_noret_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn w8, w0
+; CHECK-NEXT:    adrp x9, var32
+; CHECK-NEXT:    add x9, x9, :lo12:var32
+; CHECK-NEXT:    ldclrl w8, w8, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -3119,19 +4802,30 @@ define dso_local void @test_atomic_load_and_i32_noret_release(i32 %offset) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw and ptr @var32, i32 %offset release
-; CHECK-NOT: dmb
-; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldclrl w{{[0-9]*}}, w[[NEW:[1-9][0-9]*]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_and_i64_noret_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i64_noret_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn x8, x0
+; CHECK-NEXT:    adrp x9, var64
+; CHECK-NEXT:    add x9, x9, :lo12:var64
+; CHECK-NEXT:    ldclrl x8, x8, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -3141,19 +4835,30 @@ define dso_local void @test_atomic_load_and_i64_noret_release(i64 %offset) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn x0, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw and ptr @var64, i64 %offset release
-; CHECK-NOT: dmb
-; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldclrl x{{[0-9]*}}, x[[NEW:[1-9][0-9]*]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_and_i8_seq_cst(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i8_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn w8, w0
+; CHECK-NEXT:    adrp x9, var8
+; CHECK-NEXT:    add x9, x9, :lo12:var8
+; CHECK-NEXT:    ldclralb w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -3163,19 +4868,31 @@ define dso_local i8 @test_atomic_load_and_i8_seq_cst(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr1_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr1_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and ptr @var8, i8 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldclralb w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_and_i16_seq_cst(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i16_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn w8, w0
+; CHECK-NEXT:    adrp x9, var16
+; CHECK-NEXT:    add x9, x9, :lo12:var16
+; CHECK-NEXT:    ldclralh w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -3185,19 +4902,31 @@ define dso_local i16 @test_atomic_load_and_i16_seq_cst(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr2_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr2_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and ptr @var16, i16 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldclralh w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_and_i32_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i32_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn w8, w0
+; CHECK-NEXT:    adrp x9, var32
+; CHECK-NEXT:    add x9, x9, :lo12:var32
+; CHECK-NEXT:    ldclral w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -3207,19 +4936,31 @@ define dso_local i32 @test_atomic_load_and_i32_seq_cst(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldclral w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_and_i64_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i64_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn x8, x0
+; CHECK-NEXT:    adrp x9, var64
+; CHECK-NEXT:    add x9, x9, :lo12:var64
+; CHECK-NEXT:    ldclral x8, x0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -3229,19 +4970,31 @@ define dso_local i64 @test_atomic_load_and_i64_seq_cst(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn x0, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw and ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldclral x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret i64 %old
 }
 
 define dso_local void @test_atomic_load_and_i32_noret_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i32_noret_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn w8, w0
+; CHECK-NEXT:    adrp x9, var32
+; CHECK-NEXT:    add x9, x9, :lo12:var32
+; CHECK-NEXT:    ldclral w8, w8, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -3251,19 +5004,31 @@ define dso_local void @test_atomic_load_and_i32_noret_seq_cst(i32 %offset) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw and ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldclral w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_and_i64_noret_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i64_noret_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn x8, x0
+; CHECK-NEXT:    adrp x9, var64
+; CHECK-NEXT:    add x9, x9, :lo12:var64
+; CHECK-NEXT:    ldclral x8, x8, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -3273,19 +5038,30 @@ define dso_local void @test_atomic_load_and_i64_noret_seq_cst(i64 %offset) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    mvn x0, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldclr8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw and ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldclral x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_cmpxchg_i8_acquire(i8 %wanted, i8 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i8_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    casab w0, w1, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i8_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -3294,21 +5070,31 @@ define dso_local i8 @test_atomic_cmpxchg_i8_acquire(i8 %wanted, i8 %new) nounwin
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas1_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i8_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x2, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas1_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg ptr @var8, i8 %wanted, i8 %new acquire acquire
    %old = extractvalue { i8, i1 } %pair, 0
 
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: casab w[[NEW:[0-9]+]], w[[OLD:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_cmpxchg_i16_acquire(i16 %wanted, i16 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i16_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    casah w0, w1, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i16_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -3317,21 +5103,31 @@ define dso_local i16 @test_atomic_cmpxchg_i16_acquire(i16 %wanted, i16 %new) nou
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas2_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i16_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x2, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas2_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg ptr @var16, i16 %wanted, i16 %new acquire acquire
    %old = extractvalue { i16, i1 } %pair, 0
 
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: casah w0, w1, [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_cmpxchg_i32_acquire(i32 %wanted, i32 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i32_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    casa w0, w1, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -3340,21 +5136,31 @@ define dso_local i32 @test_atomic_cmpxchg_i32_acquire(i32 %wanted, i32 %new) nou
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas4_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x2, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas4_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg ptr @var32, i32 %wanted, i32 %new acquire acquire
    %old = extractvalue { i32, i1 } %pair, 0
 
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: casa w0, w1, [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_cmpxchg_i64_acquire(i64 %wanted, i64 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i64_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    casa x0, x1, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i64_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -3363,21 +5169,35 @@ define dso_local i64 @test_atomic_cmpxchg_i64_acquire(i64 %wanted, i64 %new) nou
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas8_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i64_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x2, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas8_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg ptr @var64, i64 %wanted, i64 %new acquire acquire
    %old = extractvalue { i64, i1 } %pair, 0
 
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: casa x0, x1, [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local i128 @test_atomic_cmpxchg_i128_acquire(i128 %wanted, i128 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i128_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
+; CHECK-NEXT:    // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1
+; CHECK-NEXT:    // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
+; CHECK-NEXT:    // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1
+; CHECK-NEXT:    adrp x8, var128
+; CHECK-NEXT:    add x8, x8, :lo12:var128
+; CHECK-NEXT:    caspa x0, x1, x2, x3, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i128_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -3386,21 +5206,31 @@ define dso_local i128 @test_atomic_cmpxchg_i128_acquire(i128 %wanted, i128 %new)
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas16_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i128_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x4, var128
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x4, x4, :lo12:var128
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas16_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg ptr @var128, i128 %wanted, i128 %new acquire acquire
    %old = extractvalue { i128, i1 } %pair, 0
 
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var128
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128
 
-; CHECK: caspa x0, x1, x2, x3, [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i128 %old
 }
 
 define dso_local i8 @test_atomic_cmpxchg_i8_monotonic(i8 %wanted, i8 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i8_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    casb w0, w1, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i8_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -3409,21 +5239,31 @@ define dso_local i8 @test_atomic_cmpxchg_i8_monotonic(i8 %wanted, i8 %new) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas1_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i8_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x2, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas1_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg ptr @var8, i8 %wanted, i8 %new monotonic monotonic
    %old = extractvalue { i8, i1 } %pair, 0
 
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: casb w[[NEW:[0-9]+]], w[[OLD:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_cmpxchg_i16_monotonic(i16 %wanted, i16 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i16_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    cash w0, w1, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i16_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -3432,21 +5272,31 @@ define dso_local i16 @test_atomic_cmpxchg_i16_monotonic(i16 %wanted, i16 %new) n
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas2_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i16_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x2, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas2_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg ptr @var16, i16 %wanted, i16 %new monotonic monotonic
    %old = extractvalue { i16, i1 } %pair, 0
 
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: cash w0, w1, [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_cmpxchg_i32_monotonic(i32 %wanted, i32 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i32_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    cas w0, w1, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -3455,21 +5305,31 @@ define dso_local i32 @test_atomic_cmpxchg_i32_monotonic(i32 %wanted, i32 %new) n
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas4_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x2, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas4_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg ptr @var32, i32 %wanted, i32 %new monotonic monotonic
    %old = extractvalue { i32, i1 } %pair, 0
 
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: cas w0, w1, [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_cmpxchg_i64_monotonic(i64 %wanted, i64 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i64_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    cas x0, x1, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i64_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -3478,21 +5338,35 @@ define dso_local i64 @test_atomic_cmpxchg_i64_monotonic(i64 %wanted, i64 %new) n
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas8_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i64_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x2, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas8_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg ptr @var64, i64 %wanted, i64 %new monotonic monotonic
    %old = extractvalue { i64, i1 } %pair, 0
 
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: cas x0, x1, [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local i128 @test_atomic_cmpxchg_i128_monotonic(i128 %wanted, i128 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i128_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
+; CHECK-NEXT:    // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1
+; CHECK-NEXT:    // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
+; CHECK-NEXT:    // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1
+; CHECK-NEXT:    adrp x8, var128
+; CHECK-NEXT:    add x8, x8, :lo12:var128
+; CHECK-NEXT:    casp x0, x1, x2, x3, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i128_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -3501,21 +5375,31 @@ define dso_local i128 @test_atomic_cmpxchg_i128_monotonic(i128 %wanted, i128 %ne
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas16_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i128_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x4, var128
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x4, x4, :lo12:var128
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas16_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg ptr @var128, i128 %wanted, i128 %new monotonic monotonic
    %old = extractvalue { i128, i1 } %pair, 0
 
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var128
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128
 
-; CHECK: casp x0, x1, x2, x3, [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i128 %old
 }
 
 define dso_local i8 @test_atomic_cmpxchg_i8_seq_cst(i8 %wanted, i8 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i8_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    casalb w0, w1, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i8_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -3524,21 +5408,32 @@ define dso_local i8 @test_atomic_cmpxchg_i8_seq_cst(i8 %wanted, i8 %new) nounwin
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas1_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i8_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x2, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas1_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg ptr @var8, i8 %wanted, i8 %new seq_cst seq_cst
    %old = extractvalue { i8, i1 } %pair, 0
 
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: casalb w[[NEW:[0-9]+]], w[[OLD:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_cmpxchg_i16_seq_cst(i16 %wanted, i16 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i16_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    casalh w0, w1, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i16_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -3547,21 +5442,32 @@ define dso_local i16 @test_atomic_cmpxchg_i16_seq_cst(i16 %wanted, i16 %new) nou
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas2_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i16_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x2, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas2_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg ptr @var16, i16 %wanted, i16 %new seq_cst seq_cst
    %old = extractvalue { i16, i1 } %pair, 0
 
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: casalh w0, w1, [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_cmpxchg_i32_seq_cst(i32 %wanted, i32 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i32_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    casal w0, w1, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -3570,21 +5476,32 @@ define dso_local i32 @test_atomic_cmpxchg_i32_seq_cst(i32 %wanted, i32 %new) nou
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x2, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg ptr @var32, i32 %wanted, i32 %new seq_cst seq_cst
    %old = extractvalue { i32, i1 } %pair, 0
 
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: casal w0, w1, [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i32 @test_atomic_cmpxchg_i32_monotonic_seq_cst(i32 %wanted, i32 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i32_monotonic_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    casal w0, w1, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32_monotonic_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -3593,21 +5510,31 @@ define dso_local i32 @test_atomic_cmpxchg_i32_monotonic_seq_cst(i32 %wanted, i32
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32_monotonic_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x2, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg ptr @var32, i32 %wanted, i32 %new monotonic seq_cst
    %old = extractvalue { i32, i1 } %pair, 0
 
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: casal w0, w1, [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i32 @test_atomic_cmpxchg_i32_release_acquire(i32 %wanted, i32 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i32_release_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    casal w0, w1, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32_release_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -3616,21 +5543,31 @@ define dso_local i32 @test_atomic_cmpxchg_i32_release_acquire(i32 %wanted, i32 %
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32_release_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x2, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg ptr @var32, i32 %wanted, i32 %new release acquire
    %old = extractvalue { i32, i1 } %pair, 0
 
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: casal w0, w1, [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_cmpxchg_i64_seq_cst(i64 %wanted, i64 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i64_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    casal x0, x1, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i64_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -3639,21 +5576,36 @@ define dso_local i64 @test_atomic_cmpxchg_i64_seq_cst(i64 %wanted, i64 %new) nou
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i64_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x2, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x2, x2, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg ptr @var64, i64 %wanted, i64 %new seq_cst seq_cst
    %old = extractvalue { i64, i1 } %pair, 0
 
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: casal x0, x1, [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local i128 @test_atomic_cmpxchg_i128_seq_cst(i128 %wanted, i128 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i128_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
+; CHECK-NEXT:    // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1
+; CHECK-NEXT:    // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
+; CHECK-NEXT:    // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1
+; CHECK-NEXT:    adrp x8, var128
+; CHECK-NEXT:    add x8, x8, :lo12:var128
+; CHECK-NEXT:    caspal x0, x1, x2, x3, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i128_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -3662,1621 +5614,2604 @@ define dso_local i128 @test_atomic_cmpxchg_i128_seq_cst(i128 %wanted, i128 %new)
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas16_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i128_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x4, var128
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x4, x4, :lo12:var128
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas16_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %pair = cmpxchg ptr @var128, i128 %wanted, i128 %new seq_cst seq_cst
    %old = extractvalue { i128, i1 } %pair, 0
 
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var128
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128
 
-; CHECK: caspal x0, x1, x2, x3, [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i128 %old
 }
 
 define dso_local i8 @test_atomic_load_max_i8_acq_rel(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i8_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldsmaxalb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB163_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxrb w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB163_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB163_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxrb w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB163_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max ptr @var8, i8 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldsmaxalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_max_i16_acq_rel(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i16_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldsmaxalh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB164_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxrh w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    sxth w8, w10
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB164_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB164_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxrh w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    sxth w8, w10
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB164_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max ptr @var16, i16 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldsmaxalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_max_i32_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i32_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldsmaxal w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB165_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, gt
 ; OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB165_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB165_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB165_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max ptr @var32, i32 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldsmaxal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_max_i64_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i64_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldsmaxal x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB166_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, gt
 ; OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB166_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov x0, x8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB166_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB166_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max ptr @var64, i64 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldsmaxal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_max_i32_noret_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i32_noret_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldsmaxal w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB167_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, gt
 ; OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB167_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB167_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB167_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw max ptr @var32, i32 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldsmaxal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_max_i64_noret_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i64_noret_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldsmaxal x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB168_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, gt
 ; OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB168_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB168_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB168_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw max ptr @var64, i64 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldsmaxal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_max_i8_acquire(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i8_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldsmaxab w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB169_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxrb w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
 ; OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB169_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB169_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxrb w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB169_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max ptr @var8, i8 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldsmaxab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_max_i16_acquire(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i16_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldsmaxah w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB170_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxrh w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    sxth w8, w10
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
 ; OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB170_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB170_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxrh w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    sxth w8, w10
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB170_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max ptr @var16, i16 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldsmaxah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_max_i32_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i32_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldsmaxa w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB171_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, gt
 ; OUTLINE-ATOMICS-NEXT:    stxr w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB171_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB171_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxr w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB171_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max ptr @var32, i32 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldsmaxa w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_max_i64_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i64_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldsmaxa x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB172_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, gt
 ; OUTLINE-ATOMICS-NEXT:    stxr w11, x10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB172_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov x0, x8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB172_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxr w11, x10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB172_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max ptr @var64, i64 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldsmaxa x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_max_i32_noret_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i32_noret_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldsmaxa w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB173_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, gt
 ; OUTLINE-ATOMICS-NEXT:    stxr w10, w9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB173_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB173_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxr w10, w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB173_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw max ptr @var32, i32 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldsmaxa w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_max_i64_noret_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i64_noret_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldsmaxa x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB174_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, gt
 ; OUTLINE-ATOMICS-NEXT:    stxr w10, x9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB174_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB174_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxr w10, x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB174_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw max ptr @var64, i64 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldsmaxa x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_max_i8_monotonic(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i8_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldsmaxb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB175_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxrb w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
 ; OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB175_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB175_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxrb w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB175_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max ptr @var8, i8 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldsmaxb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_max_i16_monotonic(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i16_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldsmaxh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB176_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxrh w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    sxth w8, w10
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
 ; OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB176_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB176_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxrh w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    sxth w8, w10
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB176_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max ptr @var16, i16 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldsmaxh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_max_i32_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i32_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldsmax w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB177_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxr w8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, gt
 ; OUTLINE-ATOMICS-NEXT:    stxr w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB177_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB177_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxr w8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxr w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB177_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max ptr @var32, i32 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldsmax w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_max_i64_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i64_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldsmax x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB178_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxr x8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, gt
 ; OUTLINE-ATOMICS-NEXT:    stxr w11, x10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB178_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov x0, x8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB178_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxr x8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxr w11, x10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB178_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max ptr @var64, i64 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldsmax x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_max_i32_noret_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i32_noret_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldsmax w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB179_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxr w9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, gt
 ; OUTLINE-ATOMICS-NEXT:    stxr w10, w9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB179_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB179_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxr w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxr w10, w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB179_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw max ptr @var32, i32 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldsmax w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_max_i64_noret_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i64_noret_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldsmax x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB180_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxr x9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, gt
 ; OUTLINE-ATOMICS-NEXT:    stxr w10, x9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB180_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB180_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxr x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxr w10, x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB180_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw max ptr @var64, i64 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldsmax x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_max_i8_release(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i8_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldsmaxlb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB181_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxrb w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB181_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB181_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxrb w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB181_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max ptr @var8, i8 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldsmaxlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_max_i16_release(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i16_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldsmaxlh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB182_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxrh w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    sxth w8, w10
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB182_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB182_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxrh w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    sxth w8, w10
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB182_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max ptr @var16, i16 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldsmaxlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_max_i32_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i32_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldsmaxl w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB183_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxr w8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, gt
 ; OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB183_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB183_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxr w8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB183_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max ptr @var32, i32 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldsmaxl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_max_i64_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i64_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldsmaxl x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB184_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxr x8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, gt
 ; OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB184_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov x0, x8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB184_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxr x8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB184_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max ptr @var64, i64 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldsmaxl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_max_i32_noret_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i32_noret_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldsmaxl w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB185_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxr w9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, gt
 ; OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB185_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB185_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxr w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB185_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw max ptr @var32, i32 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldsmaxl w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_max_i64_noret_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i64_noret_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldsmaxl x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB186_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxr x9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, gt
 ; OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB186_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB186_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxr x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB186_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw max ptr @var64, i64 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldsmaxl x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_max_i8_seq_cst(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i8_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldsmaxalb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB187_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxrb w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB187_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB187_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxrb w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB187_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max ptr @var8, i8 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldsmaxalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_max_i16_seq_cst(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i16_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldsmaxalh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB188_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxrh w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    sxth w8, w10
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB188_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB188_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxrh w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    sxth w8, w10
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB188_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max ptr @var16, i16 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldsmaxalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_max_i32_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i32_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldsmaxal w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB189_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, gt
 ; OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB189_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB189_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB189_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldsmaxal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_max_i64_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i64_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldsmaxal x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB190_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, gt
 ; OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB190_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov x0, x8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov x8, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB190_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr x0, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x0, x8
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x10, x0, x8, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB190_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw max ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldsmaxal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_max_i32_noret_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i32_noret_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldsmaxal w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB191_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, gt
 ; OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB191_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB191_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB191_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw max ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldsmaxal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_max_i64_noret_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i64_noret_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldsmaxal x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB192_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, gt
 ; OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB192_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB192_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, gt
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB192_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw max ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldsmaxal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_min_i8_acq_rel(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i8_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldsminalb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB193_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxrb w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB193_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB193_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxrb w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB193_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min ptr @var8, i8 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldsminalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_min_i16_acq_rel(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i16_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldsminalh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB194_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxrh w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    sxth w8, w10
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB194_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB194_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxrh w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    sxth w8, w10
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB194_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min ptr @var16, i16 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldsminalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_min_i32_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i32_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldsminal w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB195_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, le
 ; OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB195_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB195_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB195_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min ptr @var32, i32 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldsminal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_min_i64_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i64_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldsminal x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB196_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, le
 ; OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB196_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov x0, x8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB196_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB196_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min ptr @var64, i64 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldsminal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_min_i32_noret_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i32_noret_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldsminal w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB197_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, le
 ; OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB197_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB197_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB197_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw min ptr @var32, i32 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldsminal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_min_i64_noret_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i64_noret_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldsminal x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB198_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, le
 ; OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB198_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB198_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB198_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw min ptr @var64, i64 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldsminal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_min_i8_acquire(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i8_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldsminab w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB199_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxrb w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
 ; OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB199_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB199_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxrb w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB199_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min ptr @var8, i8 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldsminab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_min_i16_acquire(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i16_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldsminah w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB200_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxrh w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    sxth w8, w10
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
 ; OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB200_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB200_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxrh w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    sxth w8, w10
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB200_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min ptr @var16, i16 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldsminah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_min_i32_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i32_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldsmina w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB201_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, le
 ; OUTLINE-ATOMICS-NEXT:    stxr w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB201_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB201_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxr w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB201_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min ptr @var32, i32 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldsmina w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_min_i64_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i64_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldsmina x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB202_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, le
 ; OUTLINE-ATOMICS-NEXT:    stxr w11, x10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB202_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov x0, x8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB202_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxr w11, x10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB202_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min ptr @var64, i64 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldsmina x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_min_i32_noret_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i32_noret_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldsmina w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB203_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, le
 ; OUTLINE-ATOMICS-NEXT:    stxr w10, w9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB203_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB203_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxr w10, w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB203_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw min ptr @var32, i32 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldsmina w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_min_i64_noret_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i64_noret_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldsmina x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB204_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, le
 ; OUTLINE-ATOMICS-NEXT:    stxr w10, x9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB204_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB204_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxr w10, x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB204_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw min ptr @var64, i64 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldsmina x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_min_i8_monotonic(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i8_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldsminb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB205_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxrb w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
 ; OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB205_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB205_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxrb w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB205_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min ptr @var8, i8 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldsminb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_min_i16_monotonic(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i16_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldsminh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB206_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxrh w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    sxth w8, w10
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
 ; OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB206_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB206_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxrh w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    sxth w8, w10
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB206_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min ptr @var16, i16 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldsminh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_min_i32_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i32_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldsmin w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB207_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxr w8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, le
 ; OUTLINE-ATOMICS-NEXT:    stxr w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB207_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB207_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxr w8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxr w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB207_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min ptr @var32, i32 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldsmin w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_min_i64_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i64_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldsmin x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB208_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxr x8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, le
 ; OUTLINE-ATOMICS-NEXT:    stxr w11, x10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB208_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov x0, x8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB208_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxr x8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxr w11, x10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB208_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min ptr @var64, i64 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldsmin x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_min_i32_noret_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i32_noret_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldsmin w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB209_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxr w9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, le
 ; OUTLINE-ATOMICS-NEXT:    stxr w10, w9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB209_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB209_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxr w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxr w10, w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB209_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw min ptr @var32, i32 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldsmin w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_min_i64_noret_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i64_noret_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldsmin x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB210_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxr x9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, le
 ; OUTLINE-ATOMICS-NEXT:    stxr w10, x9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB210_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB210_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxr x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxr w10, x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB210_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw min ptr @var64, i64 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldsmin x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_min_i8_release(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i8_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldsminlb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB211_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxrb w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB211_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB211_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxrb w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB211_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min ptr @var8, i8 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldsminlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_min_i16_release(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i16_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldsminlh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB212_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxrh w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    sxth w8, w10
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB212_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB212_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxrh w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    sxth w8, w10
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB212_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min ptr @var16, i16 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldsminlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_min_i32_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i32_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldsminl w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB213_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxr w8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, le
 ; OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB213_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB213_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxr w8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB213_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min ptr @var32, i32 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldsminl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_min_i64_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i64_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldsminl x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB214_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxr x8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, le
 ; OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB214_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov x0, x8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB214_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxr x8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB214_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min ptr @var64, i64 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldsminl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_min_i32_noret_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i32_noret_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldsminl w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB215_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxr w9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, le
 ; OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB215_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB215_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxr w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB215_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw min ptr @var32, i32 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldsminl w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_min_i64_noret_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i64_noret_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldsminl x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB216_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxr x9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, le
 ; OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB216_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB216_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxr x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB216_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw min ptr @var64, i64 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldsminl x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_min_i8_seq_cst(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i8_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldsminalb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var8
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB217_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxrb w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB217_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB217_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxrb w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    sxtb w8, w10
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxtb
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB217_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min ptr @var8, i8 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldsminalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_min_i16_seq_cst(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i16_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldsminalh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var16
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB218_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxrh w10, [x9]
 ; OUTLINE-ATOMICS-NEXT:    sxth w8, w10
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB218_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB218_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxrh w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    sxth w8, w10
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0, sxth
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w10, w0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB218_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min ptr @var16, i16 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldsminalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_min_i32_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i32_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldsminal w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB219_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, le
 ; OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB219_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB219_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB219_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldsminal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_min_i64_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i64_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldsminal x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB220_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, le
 ; OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB220_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov x0, x8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov x8, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB220_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr x0, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x0, x8
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x10, x0, x8, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB220_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw min ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldsminal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_min_i32_noret_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i32_noret_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldsminal w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB221_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, le
 ; OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB221_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB221_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB221_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw min ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldsminal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_min_i64_noret_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i64_noret_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldsminal x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB222_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, le
 ; OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB222_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB222_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, le
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB222_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw min ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldsminal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_or_i8_acq_rel(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i8_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldsetalb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5285,19 +8220,29 @@ define dso_local i8 @test_atomic_load_or_i8_acq_rel(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset1_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset1_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or ptr @var8, i8 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldsetalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_or_i16_acq_rel(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i16_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldsetalh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5306,19 +8251,29 @@ define dso_local i16 @test_atomic_load_or_i16_acq_rel(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset2_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset2_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or ptr @var16, i16 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldsetalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_or_i32_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i32_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldsetal w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5327,19 +8282,29 @@ define dso_local i32 @test_atomic_load_or_i32_acq_rel(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or ptr @var32, i32 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldsetal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_or_i64_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i64_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldsetal x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5348,19 +8313,29 @@ define dso_local i64 @test_atomic_load_or_i64_acq_rel(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or ptr @var64, i64 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldsetal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_or_i32_noret_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i32_noret_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldsetal w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5369,18 +8344,28 @@ define dso_local void @test_atomic_load_or_i32_noret_acq_rel(i32 %offset) nounwi
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw or ptr @var32, i32 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldsetal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_or_i64_noret_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i64_noret_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldsetal x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5389,18 +8374,28 @@ define dso_local void @test_atomic_load_or_i64_noret_acq_rel(i64 %offset) nounwi
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw or ptr @var64, i64 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldsetal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_or_i8_acquire(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i8_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldsetab w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5409,19 +8404,29 @@ define dso_local i8 @test_atomic_load_or_i8_acquire(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset1_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset1_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or ptr @var8, i8 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldsetab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_or_i16_acquire(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i16_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldsetah w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5430,19 +8435,29 @@ define dso_local i16 @test_atomic_load_or_i16_acquire(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset2_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset2_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or ptr @var16, i16 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldsetah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_or_i32_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i32_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldseta w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5451,19 +8466,29 @@ define dso_local i32 @test_atomic_load_or_i32_acquire(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or ptr @var32, i32 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldseta w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_or_i64_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i64_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldseta x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5472,19 +8497,29 @@ define dso_local i64 @test_atomic_load_or_i64_acquire(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or ptr @var64, i64 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldseta x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_or_i32_noret_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i32_noret_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldseta w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5493,18 +8528,28 @@ define dso_local void @test_atomic_load_or_i32_noret_acquire(i32 %offset) nounwi
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw or ptr @var32, i32 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldseta w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_or_i64_noret_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i64_noret_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldseta x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5513,18 +8558,28 @@ define dso_local void @test_atomic_load_or_i64_noret_acquire(i64 %offset) nounwi
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw or ptr @var64, i64 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldseta x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_or_i8_monotonic(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i8_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldsetb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5533,19 +8588,29 @@ define dso_local i8 @test_atomic_load_or_i8_monotonic(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset1_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset1_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or ptr @var8, i8 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldsetb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_or_i16_monotonic(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i16_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldseth w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5554,19 +8619,29 @@ define dso_local i16 @test_atomic_load_or_i16_monotonic(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset2_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset2_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or ptr @var16, i16 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldseth w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_or_i32_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i32_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldset w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5575,19 +8650,29 @@ define dso_local i32 @test_atomic_load_or_i32_monotonic(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or ptr @var32, i32 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldset w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_or_i64_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i64_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldset x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5596,19 +8681,29 @@ define dso_local i64 @test_atomic_load_or_i64_monotonic(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or ptr @var64, i64 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldset x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_or_i32_noret_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i32_noret_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldset w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5617,18 +8712,28 @@ define dso_local void @test_atomic_load_or_i32_noret_monotonic(i32 %offset) noun
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw or ptr @var32, i32 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldset w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_or_i64_noret_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i64_noret_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldset x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5637,18 +8742,28 @@ define dso_local void @test_atomic_load_or_i64_noret_monotonic(i64 %offset) noun
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw or ptr @var64, i64 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldset x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_or_i8_release(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i8_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldsetlb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5657,19 +8772,29 @@ define dso_local i8 @test_atomic_load_or_i8_release(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset1_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset1_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or ptr @var8, i8 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldsetlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_or_i16_release(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i16_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldsetlh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5678,19 +8803,29 @@ define dso_local i16 @test_atomic_load_or_i16_release(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset2_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset2_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or ptr @var16, i16 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldsetlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_or_i32_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i32_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldsetl w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5699,19 +8834,29 @@ define dso_local i32 @test_atomic_load_or_i32_release(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or ptr @var32, i32 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldsetl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_or_i64_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i64_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldsetl x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5720,19 +8865,29 @@ define dso_local i64 @test_atomic_load_or_i64_release(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or ptr @var64, i64 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldsetl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_or_i32_noret_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i32_noret_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldsetl w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5741,18 +8896,28 @@ define dso_local void @test_atomic_load_or_i32_noret_release(i32 %offset) nounwi
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw or ptr @var32, i32 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldsetl w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_or_i64_noret_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i64_noret_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldsetl x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5761,18 +8926,28 @@ define dso_local void @test_atomic_load_or_i64_noret_release(i64 %offset) nounwi
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw or ptr @var64, i64 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldsetl x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_or_i8_seq_cst(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i8_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldsetalb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5781,19 +8956,30 @@ define dso_local i8 @test_atomic_load_or_i8_seq_cst(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset1_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset1_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or ptr @var8, i8 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldsetalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_or_i16_seq_cst(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i16_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldsetalh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5802,19 +8988,30 @@ define dso_local i16 @test_atomic_load_or_i16_seq_cst(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset2_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset2_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or ptr @var16, i16 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldsetalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_or_i32_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i32_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldsetal w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5823,19 +9020,30 @@ define dso_local i32 @test_atomic_load_or_i32_seq_cst(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldsetal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_or_i64_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i64_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldsetal x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5844,19 +9052,30 @@ define dso_local i64 @test_atomic_load_or_i64_seq_cst(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw or ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldsetal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_or_i32_noret_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i32_noret_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldsetal w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5865,18 +9084,29 @@ define dso_local void @test_atomic_load_or_i32_noret_seq_cst(i32 %offset) nounwi
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw or ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldsetal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_or_i64_noret_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i64_noret_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldsetal x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5885,18 +9115,30 @@ define dso_local void @test_atomic_load_or_i64_noret_seq_cst(i64 %offset) nounwi
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldset8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw or ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldsetal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_sub_i8_acq_rel(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i8_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w8, w0
+; CHECK-NEXT:    adrp x9, var8
+; CHECK-NEXT:    add x9, x9, :lo12:var8
+; CHECK-NEXT:    ldaddalb w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5906,20 +9148,31 @@ define dso_local i8 @test_atomic_load_sub_i8_acq_rel(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub ptr @var8, i8 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldaddalb w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_sub_i16_acq_rel(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i16_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w8, w0
+; CHECK-NEXT:    adrp x9, var16
+; CHECK-NEXT:    add x9, x9, :lo12:var16
+; CHECK-NEXT:    ldaddalh w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5929,20 +9182,31 @@ define dso_local i16 @test_atomic_load_sub_i16_acq_rel(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub ptr @var16, i16 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldaddalh w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_sub_i32_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i32_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w8, w0
+; CHECK-NEXT:    adrp x9, var32
+; CHECK-NEXT:    add x9, x9, :lo12:var32
+; CHECK-NEXT:    ldaddal w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5952,20 +9216,31 @@ define dso_local i32 @test_atomic_load_sub_i32_acq_rel(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub ptr @var32, i32 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldaddal w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_sub_i64_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i64_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg x8, x0
+; CHECK-NEXT:    adrp x9, var64
+; CHECK-NEXT:    add x9, x9, :lo12:var64
+; CHECK-NEXT:    ldaddal x8, x0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5975,20 +9250,31 @@ define dso_local i64 @test_atomic_load_sub_i64_acq_rel(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg x0, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub ptr @var64, i64 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldaddal x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret i64 %old
 }
 
 define dso_local void @test_atomic_load_sub_i32_noret_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i32_noret_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w8, w0
+; CHECK-NEXT:    adrp x9, var32
+; CHECK-NEXT:    add x9, x9, :lo12:var32
+; CHECK-NEXT:    ldaddal w8, w8, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -5998,20 +9284,31 @@ define dso_local void @test_atomic_load_sub_i32_noret_acq_rel(i32 %offset) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw sub ptr @var32, i32 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldaddal w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret void
 }
 
 define dso_local void @test_atomic_load_sub_i64_noret_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i64_noret_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg x8, x0
+; CHECK-NEXT:    adrp x9, var64
+; CHECK-NEXT:    add x9, x9, :lo12:var64
+; CHECK-NEXT:    ldaddal x8, x8, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6021,20 +9318,31 @@ define dso_local void @test_atomic_load_sub_i64_noret_acq_rel(i64 %offset) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg x0, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw sub ptr @var64, i64 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldaddal x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret void
 }
 
 define dso_local i8 @test_atomic_load_sub_i8_acquire(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i8_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w8, w0
+; CHECK-NEXT:    adrp x9, var8
+; CHECK-NEXT:    add x9, x9, :lo12:var8
+; CHECK-NEXT:    ldaddab w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6044,20 +9352,31 @@ define dso_local i8 @test_atomic_load_sub_i8_acquire(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub ptr @var8, i8 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldaddab w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_sub_i16_acquire(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i16_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w8, w0
+; CHECK-NEXT:    adrp x9, var16
+; CHECK-NEXT:    add x9, x9, :lo12:var16
+; CHECK-NEXT:    ldaddah w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6067,20 +9386,31 @@ define dso_local i16 @test_atomic_load_sub_i16_acquire(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub ptr @var16, i16 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldaddah w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_sub_i32_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i32_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w8, w0
+; CHECK-NEXT:    adrp x9, var32
+; CHECK-NEXT:    add x9, x9, :lo12:var32
+; CHECK-NEXT:    ldadda w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6090,20 +9420,31 @@ define dso_local i32 @test_atomic_load_sub_i32_acquire(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub ptr @var32, i32 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldadda w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_sub_i64_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i64_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg x8, x0
+; CHECK-NEXT:    adrp x9, var64
+; CHECK-NEXT:    add x9, x9, :lo12:var64
+; CHECK-NEXT:    ldadda x8, x0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6113,20 +9454,31 @@ define dso_local i64 @test_atomic_load_sub_i64_acquire(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg x0, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub ptr @var64, i64 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldadda x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret i64 %old
 }
 
 define dso_local void @test_atomic_load_sub_i32_noret_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i32_noret_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w8, w0
+; CHECK-NEXT:    adrp x9, var32
+; CHECK-NEXT:    add x9, x9, :lo12:var32
+; CHECK-NEXT:    ldadda w8, w8, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6136,20 +9488,31 @@ define dso_local void @test_atomic_load_sub_i32_noret_acquire(i32 %offset) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw sub ptr @var32, i32 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldadda w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret void
 }
 
 define dso_local void @test_atomic_load_sub_i64_noret_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i64_noret_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg x8, x0
+; CHECK-NEXT:    adrp x9, var64
+; CHECK-NEXT:    add x9, x9, :lo12:var64
+; CHECK-NEXT:    ldadda x8, x8, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6159,20 +9522,31 @@ define dso_local void @test_atomic_load_sub_i64_noret_acquire(i64 %offset) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg x0, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw sub ptr @var64, i64 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldadda x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret void
 }
 
 define dso_local i8 @test_atomic_load_sub_i8_monotonic(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i8_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w8, w0
+; CHECK-NEXT:    adrp x9, var8
+; CHECK-NEXT:    add x9, x9, :lo12:var8
+; CHECK-NEXT:    ldaddb w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6182,20 +9556,31 @@ define dso_local i8 @test_atomic_load_sub_i8_monotonic(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub ptr @var8, i8 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldaddb w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_sub_i16_monotonic(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i16_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w8, w0
+; CHECK-NEXT:    adrp x9, var16
+; CHECK-NEXT:    add x9, x9, :lo12:var16
+; CHECK-NEXT:    ldaddh w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6205,20 +9590,31 @@ define dso_local i16 @test_atomic_load_sub_i16_monotonic(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub ptr @var16, i16 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldaddh w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_sub_i32_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i32_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w8, w0
+; CHECK-NEXT:    adrp x9, var32
+; CHECK-NEXT:    add x9, x9, :lo12:var32
+; CHECK-NEXT:    ldadd w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6228,20 +9624,31 @@ define dso_local i32 @test_atomic_load_sub_i32_monotonic(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub ptr @var32, i32 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldadd w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_sub_i64_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i64_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg x8, x0
+; CHECK-NEXT:    adrp x9, var64
+; CHECK-NEXT:    add x9, x9, :lo12:var64
+; CHECK-NEXT:    ldadd x8, x0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6251,20 +9658,31 @@ define dso_local i64 @test_atomic_load_sub_i64_monotonic(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg x0, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub ptr @var64, i64 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldadd x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret i64 %old
 }
 
 define dso_local void @test_atomic_load_sub_i32_noret_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i32_noret_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w8, w0
+; CHECK-NEXT:    adrp x9, var32
+; CHECK-NEXT:    add x9, x9, :lo12:var32
+; CHECK-NEXT:    ldadd w8, w8, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6274,20 +9692,31 @@ define dso_local void @test_atomic_load_sub_i32_noret_monotonic(i32 %offset) nou
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw sub ptr @var32, i32 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldadd w{{[0-9]+}}, w[[NEW:[1-9][0-9]*]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret void
 }
 
 define dso_local void @test_atomic_load_sub_i64_noret_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i64_noret_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg x8, x0
+; CHECK-NEXT:    adrp x9, var64
+; CHECK-NEXT:    add x9, x9, :lo12:var64
+; CHECK-NEXT:    ldadd x8, x8, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6297,20 +9726,31 @@ define dso_local void @test_atomic_load_sub_i64_noret_monotonic(i64 %offset) nou
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg x0, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw sub ptr @var64, i64 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldadd x{{[0-9]+}}, x[[NEW:[1-9][0-9]*]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret void
 }
 
 define dso_local i8 @test_atomic_load_sub_i8_release(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i8_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w8, w0
+; CHECK-NEXT:    adrp x9, var8
+; CHECK-NEXT:    add x9, x9, :lo12:var8
+; CHECK-NEXT:    ldaddlb w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6320,20 +9760,31 @@ define dso_local i8 @test_atomic_load_sub_i8_release(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub ptr @var8, i8 %offset release
-; CHECK-NOT: dmb
-; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldaddlb w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_sub_i16_release(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i16_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w8, w0
+; CHECK-NEXT:    adrp x9, var16
+; CHECK-NEXT:    add x9, x9, :lo12:var16
+; CHECK-NEXT:    ldaddlh w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6343,20 +9794,31 @@ define dso_local i16 @test_atomic_load_sub_i16_release(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub ptr @var16, i16 %offset release
-; CHECK-NOT: dmb
-; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldaddlh w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_sub_i32_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i32_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w8, w0
+; CHECK-NEXT:    adrp x9, var32
+; CHECK-NEXT:    add x9, x9, :lo12:var32
+; CHECK-NEXT:    ldaddl w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6366,20 +9828,31 @@ define dso_local i32 @test_atomic_load_sub_i32_release(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub ptr @var32, i32 %offset release
-; CHECK-NOT: dmb
-; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldaddl w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_sub_i64_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i64_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg x8, x0
+; CHECK-NEXT:    adrp x9, var64
+; CHECK-NEXT:    add x9, x9, :lo12:var64
+; CHECK-NEXT:    ldaddl x8, x0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6389,20 +9862,31 @@ define dso_local i64 @test_atomic_load_sub_i64_release(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg x0, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub ptr @var64, i64 %offset release
-; CHECK-NOT: dmb
-; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldaddl x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret i64 %old
 }
 
 define dso_local void @test_atomic_load_sub_i32_noret_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i32_noret_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w8, w0
+; CHECK-NEXT:    adrp x9, var32
+; CHECK-NEXT:    add x9, x9, :lo12:var32
+; CHECK-NEXT:    ldaddl w8, w8, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6412,20 +9896,31 @@ define dso_local void @test_atomic_load_sub_i32_noret_release(i32 %offset) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw sub ptr @var32, i32 %offset release
-; CHECK-NOT: dmb
-; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldaddl w{{[0-9]*}}, w[[NEW:[1-9][0-9]*]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret void
 }
 
 define dso_local void @test_atomic_load_sub_i64_noret_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i64_noret_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg x8, x0
+; CHECK-NEXT:    adrp x9, var64
+; CHECK-NEXT:    add x9, x9, :lo12:var64
+; CHECK-NEXT:    ldaddl x8, x8, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6435,20 +9930,31 @@ define dso_local void @test_atomic_load_sub_i64_noret_release(i64 %offset) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg x0, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw sub ptr @var64, i64 %offset release
-; CHECK-NOT: dmb
-; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldaddl x{{[0-9]*}}, x[[NEW:[1-9][0-9]*]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret void
 }
 
 define dso_local i8 @test_atomic_load_sub_i8_seq_cst(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i8_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w8, w0
+; CHECK-NEXT:    adrp x9, var8
+; CHECK-NEXT:    add x9, x9, :lo12:var8
+; CHECK-NEXT:    ldaddalb w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6458,20 +9964,32 @@ define dso_local i8 @test_atomic_load_sub_i8_seq_cst(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd1_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub ptr @var8, i8 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldaddalb w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_sub_i16_seq_cst(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i16_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w8, w0
+; CHECK-NEXT:    adrp x9, var16
+; CHECK-NEXT:    add x9, x9, :lo12:var16
+; CHECK-NEXT:    ldaddalh w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6481,20 +9999,32 @@ define dso_local i16 @test_atomic_load_sub_i16_seq_cst(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd2_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub ptr @var16, i16 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldaddalh w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_sub_i32_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i32_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w8, w0
+; CHECK-NEXT:    adrp x9, var32
+; CHECK-NEXT:    add x9, x9, :lo12:var32
+; CHECK-NEXT:    ldaddal w8, w0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6504,20 +10034,32 @@ define dso_local i32 @test_atomic_load_sub_i32_seq_cst(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldaddal w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_sub_i64_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i64_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg x8, x0
+; CHECK-NEXT:    adrp x9, var64
+; CHECK-NEXT:    add x9, x9, :lo12:var64
+; CHECK-NEXT:    ldaddal x8, x0, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6527,20 +10069,32 @@ define dso_local i64 @test_atomic_load_sub_i64_seq_cst(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg x0, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   %old = atomicrmw sub ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldaddal x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret i64 %old
 }
 
 define dso_local void @test_atomic_load_sub_i32_noret_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i32_noret_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w8, w0
+; CHECK-NEXT:    adrp x9, var32
+; CHECK-NEXT:    add x9, x9, :lo12:var32
+; CHECK-NEXT:    ldaddal w8, w8, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6550,20 +10104,32 @@ define dso_local void @test_atomic_load_sub_i32_noret_seq_cst(i32 %offset) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg w0, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw sub ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldaddal w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret void
 }
 
 define dso_local void @test_atomic_load_sub_i64_noret_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i64_noret_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg x8, x0
+; CHECK-NEXT:    adrp x9, var64
+; CHECK-NEXT:    add x9, x9, :lo12:var64
+; CHECK-NEXT:    ldaddal x8, x8, [x9]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6573,20 +10139,31 @@ define dso_local void @test_atomic_load_sub_i64_noret_seq_cst(i64 %offset) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    neg x0, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
   atomicrmw sub ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]]
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldaddal x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
   ret void
 }
 
 define dso_local i8 @test_atomic_load_xchg_i8_acq_rel(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i8_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    swpalb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6595,19 +10172,29 @@ define dso_local i8 @test_atomic_load_xchg_i8_acq_rel(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp1_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp1_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg ptr @var8, i8 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: swpalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_xchg_i16_acq_rel(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i16_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    swpalh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6616,19 +10203,29 @@ define dso_local i16 @test_atomic_load_xchg_i16_acq_rel(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp2_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp2_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg ptr @var16, i16 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: swpalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_xchg_i32_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i32_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    swpal w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6637,19 +10234,29 @@ define dso_local i32 @test_atomic_load_xchg_i32_acq_rel(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg ptr @var32, i32 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: swpal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_xchg_i64_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i64_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    swpal x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6658,19 +10265,29 @@ define dso_local i64 @test_atomic_load_xchg_i64_acq_rel(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg ptr @var64, i64 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: swpal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_xchg_i32_noret_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i32_noret_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    swpal w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6679,19 +10296,29 @@ define dso_local void @test_atomic_load_xchg_i32_noret_acq_rel(i32 %offset) noun
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xchg ptr @var32, i32 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: swpal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret void
 }
 
 define dso_local void @test_atomic_load_xchg_i64_noret_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i64_noret_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    swpal x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6700,19 +10327,29 @@ define dso_local void @test_atomic_load_xchg_i64_noret_acq_rel(i64 %offset) noun
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xchg ptr @var64, i64 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: swpal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret void
 }
 
 define dso_local i8 @test_atomic_load_xchg_i8_acquire(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i8_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    swpab w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6721,19 +10358,29 @@ define dso_local i8 @test_atomic_load_xchg_i8_acquire(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp1_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp1_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg ptr @var8, i8 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: swpab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_xchg_i16_acquire(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i16_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    swpah w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6742,19 +10389,29 @@ define dso_local i16 @test_atomic_load_xchg_i16_acquire(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp2_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp2_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg ptr @var16, i16 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: swpah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_xchg_i32_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i32_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    swpa w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6763,19 +10420,29 @@ define dso_local i32 @test_atomic_load_xchg_i32_acquire(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg ptr @var32, i32 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: swpa w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_xchg_i64_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i64_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    swpa x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6784,19 +10451,29 @@ define dso_local i64 @test_atomic_load_xchg_i64_acquire(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg ptr @var64, i64 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: swpa x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_xchg_i32_noret_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i32_noret_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    swpa w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6805,19 +10482,29 @@ define dso_local void @test_atomic_load_xchg_i32_noret_acquire(i32 %offset) noun
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xchg ptr @var32, i32 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: swpa w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret void
 }
 
 define dso_local void @test_atomic_load_xchg_i64_noret_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i64_noret_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    swpa x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6826,19 +10513,29 @@ define dso_local void @test_atomic_load_xchg_i64_noret_acquire(i64 %offset) noun
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xchg ptr @var64, i64 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: swpa x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret void
 }
 
 define dso_local i8 @test_atomic_load_xchg_i8_monotonic(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i8_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    swpb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6847,19 +10544,29 @@ define dso_local i8 @test_atomic_load_xchg_i8_monotonic(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp1_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp1_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg ptr @var8, i8 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: swpb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_xchg_i16_monotonic(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i16_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    swph w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6868,19 +10575,29 @@ define dso_local i16 @test_atomic_load_xchg_i16_monotonic(i16 %offset) nounwind
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp2_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp2_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg ptr @var16, i16 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: swph w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_xchg_i32_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i32_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    swp w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6889,19 +10606,29 @@ define dso_local i32 @test_atomic_load_xchg_i32_monotonic(i32 %offset) nounwind
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg ptr @var32, i32 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: swp w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_xchg_i64_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i64_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    swp x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6910,19 +10637,29 @@ define dso_local i64 @test_atomic_load_xchg_i64_monotonic(i64 %offset) nounwind
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg ptr @var64, i64 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: swp x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_xchg_i32_noret_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i32_noret_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    swp w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6931,19 +10668,29 @@ define dso_local void @test_atomic_load_xchg_i32_noret_monotonic(i32 %offset) no
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xchg ptr @var32, i32 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: swp w[[OLD:[0-9]+]], w[[NEW:[0-9,a-z]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret void
 }
 
 define dso_local void @test_atomic_load_xchg_i64_noret_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i64_noret_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    swp x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6952,19 +10699,29 @@ define dso_local void @test_atomic_load_xchg_i64_noret_monotonic(i64 %offset) no
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xchg ptr @var64, i64 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: swp x[[OLD:[0-9]+]], x[[NEW:[0-9,a-z]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret void
 }
 
 define dso_local i8 @test_atomic_load_xchg_i8_release(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i8_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    swplb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6973,19 +10730,29 @@ define dso_local i8 @test_atomic_load_xchg_i8_release(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp1_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp1_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg ptr @var8, i8 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: swplb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_xchg_i16_release(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i16_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    swplh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -6994,19 +10761,29 @@ define dso_local i16 @test_atomic_load_xchg_i16_release(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp2_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp2_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg ptr @var16, i16 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: swplh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_xchg_i32_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i32_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    swpl w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -7015,19 +10792,29 @@ define dso_local i32 @test_atomic_load_xchg_i32_release(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg ptr @var32, i32 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: swpl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_xchg_i64_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i64_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    swpl x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -7036,19 +10823,29 @@ define dso_local i64 @test_atomic_load_xchg_i64_release(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg ptr @var64, i64 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: swpl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_xchg_i32_noret_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i32_noret_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    swpl w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -7057,19 +10854,29 @@ define dso_local void @test_atomic_load_xchg_i32_noret_release(i32 %offset) noun
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xchg ptr @var32, i32 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: swpl w[[OLD:[0-9]+]], w[[NEW:[0-9,a-z]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret void
 }
 
 define dso_local void @test_atomic_load_xchg_i64_noret_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i64_noret_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    swpl x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -7078,19 +10885,29 @@ define dso_local void @test_atomic_load_xchg_i64_noret_release(i64 %offset) noun
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xchg ptr @var64, i64 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: swpl x[[OLD:[0-9]+]], x[[NEW:[0-9,a-z]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret void
 }
 
 define dso_local i8 @test_atomic_load_xchg_i8_seq_cst(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i8_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    swpalb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -7099,19 +10916,30 @@ define dso_local i8 @test_atomic_load_xchg_i8_seq_cst(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp1_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp1_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg ptr @var8, i8 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: swpalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_xchg_i16_seq_cst(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i16_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    swpalh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -7120,19 +10948,30 @@ define dso_local i16 @test_atomic_load_xchg_i16_seq_cst(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp2_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp2_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg ptr @var16, i16 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: swpalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_xchg_i32_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i32_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    swpal w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -7141,19 +10980,30 @@ define dso_local i32 @test_atomic_load_xchg_i32_seq_cst(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: swpal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_xchg_i64_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i64_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    swpal x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -7162,19 +11012,30 @@ define dso_local i64 @test_atomic_load_xchg_i64_seq_cst(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xchg ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: swpal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_xchg_i32_noret_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i32_noret_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    swpal w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -7183,19 +11044,30 @@ define dso_local void @test_atomic_load_xchg_i32_noret_seq_cst(i32 %offset) noun
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xchg ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: swpal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret void
 }
 
 define dso_local void @test_atomic_load_xchg_i64_noret_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i64_noret_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    swpal x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -7204,1619 +11076,2602 @@ define dso_local void @test_atomic_load_xchg_i64_noret_seq_cst(i64 %offset) noun
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_swp8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xchg ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: swpal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret void
 }
 
 define dso_local i8 @test_atomic_load_umax_i8_acq_rel(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i8_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldumaxalb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var8
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var8
 ; OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xff
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB313_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxrb w0, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w0, w9
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, hi
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB313_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xff
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB313_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxrb w0, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w0, w9
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB313_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var8, i8 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldumaxalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_umax_i16_acq_rel(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i16_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldumaxalh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var16
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var16
 ; OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xffff
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB314_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxrh w0, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w0, w9
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, hi
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB314_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xffff
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB314_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxrh w0, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w0, w9
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB314_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var16, i16 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldumaxalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_umax_i32_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i32_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldumaxal w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB315_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
 ; OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB315_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB315_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB315_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var32, i32 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldumaxal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_umax_i64_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i64_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldumaxal x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB316_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, hi
 ; OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB316_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov x0, x8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB316_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB316_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var64, i64 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldumaxal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_umax_i32_noret_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i32_noret_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldumaxal w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB317_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, hi
 ; OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB317_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB317_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB317_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umax ptr @var32, i32 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldumaxal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_umax_i64_noret_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i64_noret_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldumaxal x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB318_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, hi
 ; OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB318_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB318_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB318_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umax ptr @var64, i64 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldumaxal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_umax_i8_acquire(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i8_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldumaxab w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var8
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var8
 ; OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xff
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB319_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxrb w0, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w0, w9
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, hi
 ; OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB319_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xff
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB319_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxrb w0, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w0, w9
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB319_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var8, i8 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldumaxab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_umax_i16_acquire(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i16_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldumaxah w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var16
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var16
 ; OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xffff
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB320_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxrh w0, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w0, w9
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, hi
 ; OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB320_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xffff
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB320_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxrh w0, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w0, w9
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB320_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var16, i16 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldumaxah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_umax_i32_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i32_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldumaxa w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB321_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
 ; OUTLINE-ATOMICS-NEXT:    stxr w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB321_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB321_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxr w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB321_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var32, i32 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldumaxa w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_umax_i64_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i64_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldumaxa x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB322_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, hi
 ; OUTLINE-ATOMICS-NEXT:    stxr w11, x10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB322_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov x0, x8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB322_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxr w11, x10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB322_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var64, i64 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldumaxa x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_umax_i32_noret_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i32_noret_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldumaxa w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB323_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, hi
 ; OUTLINE-ATOMICS-NEXT:    stxr w10, w9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB323_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB323_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxr w10, w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB323_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umax ptr @var32, i32 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldumaxa w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_umax_i64_noret_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i64_noret_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldumaxa x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB324_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, hi
 ; OUTLINE-ATOMICS-NEXT:    stxr w10, x9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB324_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB324_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxr w10, x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB324_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umax ptr @var64, i64 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldumaxa x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_umax_i8_monotonic(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i8_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldumaxb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var8
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var8
 ; OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xff
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB325_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxrb w0, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w0, w9
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, hi
 ; OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB325_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xff
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB325_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxrb w0, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w0, w9
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB325_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var8, i8 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldumaxb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_umax_i16_monotonic(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i16_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldumaxh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var16
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var16
 ; OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xffff
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB326_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxrh w0, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w0, w9
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, hi
 ; OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB326_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xffff
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB326_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxrh w0, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w0, w9
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB326_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var16, i16 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldumaxh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_umax_i32_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i32_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldumax w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB327_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxr w8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
 ; OUTLINE-ATOMICS-NEXT:    stxr w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB327_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB327_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxr w8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxr w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB327_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var32, i32 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldumax w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_umax_i64_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i64_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldumax x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB328_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxr x8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, hi
 ; OUTLINE-ATOMICS-NEXT:    stxr w11, x10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB328_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov x0, x8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB328_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxr x8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxr w11, x10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB328_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var64, i64 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldumax x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_umax_i32_noret_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i32_noret_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldumax w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB329_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxr w9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, hi
 ; OUTLINE-ATOMICS-NEXT:    stxr w10, w9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB329_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB329_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxr w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxr w10, w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB329_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umax ptr @var32, i32 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldumax w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_umax_i64_noret_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i64_noret_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldumax x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB330_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxr x9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, hi
 ; OUTLINE-ATOMICS-NEXT:    stxr w10, x9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB330_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB330_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxr x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxr w10, x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB330_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umax ptr @var64, i64 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldumax x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_umax_i8_release(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i8_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldumaxlb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var8
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var8
 ; OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xff
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB331_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxrb w0, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w0, w9
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, hi
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB331_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xff
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB331_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxrb w0, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w0, w9
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB331_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var8, i8 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldumaxlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_umax_i16_release(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i16_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldumaxlh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var16
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var16
 ; OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xffff
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB332_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxrh w0, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w0, w9
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, hi
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB332_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xffff
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB332_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxrh w0, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w0, w9
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB332_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var16, i16 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldumaxlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_umax_i32_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i32_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldumaxl w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB333_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxr w8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
 ; OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB333_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB333_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxr w8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB333_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var32, i32 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldumaxl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_umax_i64_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i64_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldumaxl x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB334_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxr x8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, hi
 ; OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB334_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov x0, x8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB334_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxr x8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB334_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var64, i64 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldumaxl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_umax_i32_noret_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i32_noret_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldumaxl w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB335_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxr w9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, hi
 ; OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB335_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB335_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxr w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB335_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umax ptr @var32, i32 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldumaxl w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_umax_i64_noret_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i64_noret_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldumaxl x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB336_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxr x9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, hi
 ; OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB336_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB336_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxr x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB336_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umax ptr @var64, i64 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldumaxl x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_umax_i8_seq_cst(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i8_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldumaxalb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var8
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var8
 ; OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xff
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB337_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxrb w0, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w0, w9
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, hi
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB337_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xff
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB337_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxrb w0, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w0, w9
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB337_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var8, i8 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldumaxalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_umax_i16_seq_cst(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i16_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldumaxalh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var16
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var16
 ; OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xffff
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB338_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxrh w0, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w0, w9
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, hi
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB338_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xffff
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB338_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxrh w0, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w0, w9
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB338_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var16, i16 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldumaxalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_umax_i32_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i32_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldumaxal w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB339_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
 ; OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB339_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB339_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB339_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldumaxal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_umax_i64_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i64_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldumaxal x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB340_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, hi
 ; OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB340_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov x0, x8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov x8, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB340_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr x0, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x0, x8
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x10, x0, x8, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB340_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldumaxal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_umax_i32_noret_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i32_noret_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldumaxal w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB341_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, hi
 ; OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB341_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB341_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB341_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umax ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldumaxal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_umax_i64_noret_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i64_noret_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldumaxal x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB342_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, hi
 ; OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB342_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB342_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, hi
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB342_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umax ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldumaxal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_umin_i8_acq_rel(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i8_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    lduminalb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var8
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var8
 ; OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xff
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB343_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxrb w0, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w0, w9
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, ls
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB343_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xff
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB343_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxrb w0, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w0, w9
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB343_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var8, i8 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: lduminalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_umin_i16_acq_rel(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i16_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    lduminalh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var16
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var16
 ; OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xffff
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB344_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxrh w0, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w0, w9
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, ls
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB344_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xffff
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB344_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxrh w0, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w0, w9
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB344_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var16, i16 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: lduminalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_umin_i32_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i32_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    lduminal w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB345_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
 ; OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB345_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB345_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB345_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var32, i32 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: lduminal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_umin_i64_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i64_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    lduminal x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB346_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, ls
 ; OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB346_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov x0, x8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB346_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB346_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var64, i64 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: lduminal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_umin_i32_noret_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i32_noret_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    lduminal w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB347_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, ls
 ; OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB347_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB347_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB347_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umin ptr @var32, i32 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: lduminal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_umin_i64_noret_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i64_noret_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    lduminal x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB348_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, ls
 ; OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB348_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB348_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB348_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umin ptr @var64, i64 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: lduminal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_umin_i8_acquire(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i8_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    lduminab w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var8
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var8
 ; OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xff
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB349_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxrb w0, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w0, w9
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, ls
 ; OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB349_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xff
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB349_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxrb w0, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w0, w9
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB349_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var8, i8 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: lduminab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_umin_i16_acquire(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i16_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    lduminah w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var16
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var16
 ; OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xffff
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB350_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxrh w0, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w0, w9
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, ls
 ; OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB350_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xffff
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB350_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxrh w0, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w0, w9
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB350_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var16, i16 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: lduminah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_umin_i32_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i32_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldumina w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB351_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
 ; OUTLINE-ATOMICS-NEXT:    stxr w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB351_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB351_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxr w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB351_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var32, i32 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldumina w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_umin_i64_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i64_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldumina x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB352_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, ls
 ; OUTLINE-ATOMICS-NEXT:    stxr w11, x10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB352_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov x0, x8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB352_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxr w11, x10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB352_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var64, i64 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldumina x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_umin_i32_noret_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i32_noret_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldumina w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB353_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, ls
 ; OUTLINE-ATOMICS-NEXT:    stxr w10, w9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB353_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB353_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxr w10, w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB353_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umin ptr @var32, i32 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldumina w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_umin_i64_noret_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i64_noret_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldumina x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB354_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, ls
 ; OUTLINE-ATOMICS-NEXT:    stxr w10, x9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB354_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB354_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxr w10, x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB354_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umin ptr @var64, i64 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldumina x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_umin_i8_monotonic(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i8_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    lduminb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var8
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var8
 ; OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xff
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB355_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxrb w0, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w0, w9
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, ls
 ; OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB355_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xff
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB355_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxrb w0, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w0, w9
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB355_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var8, i8 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: lduminb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_umin_i16_monotonic(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i16_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    lduminh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var16
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var16
 ; OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xffff
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB356_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxrh w0, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w0, w9
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, ls
 ; OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB356_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xffff
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB356_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxrh w0, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w0, w9
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB356_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var16, i16 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: lduminh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_umin_i32_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i32_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldumin w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB357_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxr w8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
 ; OUTLINE-ATOMICS-NEXT:    stxr w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB357_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB357_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxr w8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxr w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB357_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var32, i32 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldumin w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_umin_i64_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i64_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldumin x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB358_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxr x8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, ls
 ; OUTLINE-ATOMICS-NEXT:    stxr w11, x10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB358_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov x0, x8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB358_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxr x8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxr w11, x10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB358_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var64, i64 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldumin x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_umin_i32_noret_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i32_noret_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldumin w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB359_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxr w9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, ls
 ; OUTLINE-ATOMICS-NEXT:    stxr w10, w9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB359_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB359_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxr w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxr w10, w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB359_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umin ptr @var32, i32 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldumin w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_umin_i64_noret_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i64_noret_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldumin x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB360_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxr x9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, ls
 ; OUTLINE-ATOMICS-NEXT:    stxr w10, x9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB360_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB360_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxr x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxr w10, x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB360_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umin ptr @var64, i64 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldumin x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_umin_i8_release(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i8_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    lduminlb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var8
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var8
 ; OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xff
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB361_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxrb w0, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w0, w9
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, ls
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB361_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xff
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB361_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxrb w0, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w0, w9
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB361_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var8, i8 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: lduminlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_umin_i16_release(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i16_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    lduminlh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var16
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var16
 ; OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xffff
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB362_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxrh w0, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w0, w9
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, ls
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB362_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xffff
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB362_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxrh w0, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w0, w9
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB362_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var16, i16 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: lduminlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_umin_i32_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i32_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    lduminl w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB363_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxr w8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
 ; OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB363_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB363_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxr w8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB363_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var32, i32 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: lduminl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_umin_i64_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i64_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    lduminl x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB364_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxr x8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, ls
 ; OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB364_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov x0, x8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB364_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxr x8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x8, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB364_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov x0, x8
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var64, i64 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: lduminl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_umin_i32_noret_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i32_noret_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    lduminl w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB365_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxr w9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, ls
 ; OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB365_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB365_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxr w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB365_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umin ptr @var32, i32 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: lduminl w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_umin_i64_noret_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i64_noret_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    lduminl x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB366_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldxr x9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, ls
 ; OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB366_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB366_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxr x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB366_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umin ptr @var64, i64 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: lduminl x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_umin_i8_seq_cst(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i8_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    lduminalb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var8
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var8
 ; OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xff
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB367_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxrb w0, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w0, w9
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, ls
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB367_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xff
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB367_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxrb w0, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w0, w9
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB367_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var8, i8 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: lduminalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_umin_i16_seq_cst(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i16_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    lduminalh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var16
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var16
 ; OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xffff
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB368_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxrh w0, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w0, w9
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, ls
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB368_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    and w9, w0, #0xffff
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB368_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxrh w0, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w0, w9
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w0, w9, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB368_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var16, i16 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: lduminalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_umin_i32_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i32_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    lduminal w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var32
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB369_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp w8, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
 ; OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB369_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov w0, w8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB369_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr w8, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w8, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w10, w8, w0, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB369_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov w0, w8
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: lduminal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_umin_i64_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i64_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    lduminal x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x9, var64
 ; OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB370_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr x8, [x9]
 ; OUTLINE-ATOMICS-NEXT:    cmp x8, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x10, x8, x0, ls
 ; OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
-; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
+; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB370_1
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
 ; OUTLINE-ATOMICS-NEXT:    mov x0, x8
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    mov x8, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x9, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB370_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr x0, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x0, x8
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x10, x0, x8, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB370_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: lduminal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_umin_i32_noret_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i32_noret_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    lduminal w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var32
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB371_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp w9, w0
 ; OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, ls
 ; OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB371_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB371_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp w9, w0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel w9, w9, w0, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w10, w9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB371_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umin ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: lduminal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_umin_i64_noret_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i64_noret_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    lduminal x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    adrp x8, var64
 ; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
-; OUTLINE-ATOMICS-NEXT:  .LBB[[LOOPSTART:.*]]: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:  .LBB372_1: // %atomicrmw.start
 ; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cmp x9, x0
 ; OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, ls
 ; OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
-; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB[[LOOPSTART]]
-; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    ret
+; OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB372_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB372_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldaxr x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cmp x9, x0
+; MSVC-OUTLINE-ATOMICS-NEXT:    csel x9, x9, x0, ls
+; MSVC-OUTLINE-ATOMICS-NEXT:    stlxr w10, x9, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w10, .LBB372_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw umin ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: lduminal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_xor_i8_acq_rel(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i8_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldeoralb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -8825,19 +13680,29 @@ define dso_local i8 @test_atomic_load_xor_i8_acq_rel(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor1_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor1_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor ptr @var8, i8 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldeoralb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_xor_i16_acq_rel(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i16_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldeoralh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -8846,19 +13711,29 @@ define dso_local i16 @test_atomic_load_xor_i16_acq_rel(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor2_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor2_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor ptr @var16, i16 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldeoralh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_xor_i32_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i32_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldeoral w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -8867,19 +13742,29 @@ define dso_local i32 @test_atomic_load_xor_i32_acq_rel(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor ptr @var32, i32 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldeoral w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_xor_i64_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i64_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldeoral x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -8888,19 +13773,29 @@ define dso_local i64 @test_atomic_load_xor_i64_acq_rel(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor ptr @var64, i64 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldeoral x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_xor_i32_noret_acq_rel(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i32_noret_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldeoral w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -8909,18 +13804,28 @@ define dso_local void @test_atomic_load_xor_i32_noret_acq_rel(i32 %offset) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xor ptr @var32, i32 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldeoral w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_xor_i64_noret_acq_rel(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i64_noret_acq_rel:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldeoral x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret_acq_rel:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -8929,18 +13834,28 @@ define dso_local void @test_atomic_load_xor_i64_noret_acq_rel(i64 %offset) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret_acq_rel:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xor ptr @var64, i64 %offset acq_rel
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldeoral x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_xor_i8_acquire(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i8_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldeorab w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -8949,19 +13864,29 @@ define dso_local i8 @test_atomic_load_xor_i8_acquire(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor1_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor1_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor ptr @var8, i8 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldeorab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_xor_i16_acquire(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i16_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldeorah w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -8970,19 +13895,29 @@ define dso_local i16 @test_atomic_load_xor_i16_acquire(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor2_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor2_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor ptr @var16, i16 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldeorah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_xor_i32_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i32_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldeora w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -8991,19 +13926,29 @@ define dso_local i32 @test_atomic_load_xor_i32_acquire(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor ptr @var32, i32 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldeora w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_xor_i64_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i64_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldeora x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -9012,19 +13957,29 @@ define dso_local i64 @test_atomic_load_xor_i64_acquire(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor ptr @var64, i64 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldeora x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_xor_i32_noret_acquire(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i32_noret_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldeora w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -9033,18 +13988,28 @@ define dso_local void @test_atomic_load_xor_i32_noret_acquire(i32 %offset) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xor ptr @var32, i32 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldeora w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_xor_i64_noret_acquire(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i64_noret_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldeora x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret_acquire:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -9053,18 +14018,28 @@ define dso_local void @test_atomic_load_xor_i64_noret_acquire(i64 %offset) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_acq
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret_acquire:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_acq
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xor ptr @var64, i64 %offset acquire
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldeora x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_xor_i8_monotonic(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i8_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldeorb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -9073,19 +14048,29 @@ define dso_local i8 @test_atomic_load_xor_i8_monotonic(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor1_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor1_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor ptr @var8, i8 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldeorb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_xor_i16_monotonic(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i16_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldeorh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -9094,19 +14079,29 @@ define dso_local i16 @test_atomic_load_xor_i16_monotonic(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor2_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor2_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor ptr @var16, i16 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldeorh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_xor_i32_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i32_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldeor w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -9115,19 +14110,29 @@ define dso_local i32 @test_atomic_load_xor_i32_monotonic(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor ptr @var32, i32 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldeor w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_xor_i64_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i64_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldeor x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -9136,19 +14141,29 @@ define dso_local i64 @test_atomic_load_xor_i64_monotonic(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor ptr @var64, i64 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldeor x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_xor_i32_noret_monotonic(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i32_noret_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldeor w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -9157,18 +14172,28 @@ define dso_local void @test_atomic_load_xor_i32_noret_monotonic(i32 %offset) nou
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xor ptr @var32, i32 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldeor w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_xor_i64_noret_monotonic(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i64_noret_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldeor x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret_monotonic:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -9177,18 +14202,28 @@ define dso_local void @test_atomic_load_xor_i64_noret_monotonic(i64 %offset) nou
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_relax
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret_monotonic:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_relax
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xor ptr @var64, i64 %offset monotonic
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldeor x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_xor_i8_release(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i8_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldeorlb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -9197,19 +14232,29 @@ define dso_local i8 @test_atomic_load_xor_i8_release(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor1_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor1_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor ptr @var8, i8 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldeorlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_xor_i16_release(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i16_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldeorlh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -9218,19 +14263,29 @@ define dso_local i16 @test_atomic_load_xor_i16_release(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor2_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor2_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor ptr @var16, i16 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldeorlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_xor_i32_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i32_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldeorl w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -9239,19 +14294,29 @@ define dso_local i32 @test_atomic_load_xor_i32_release(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor ptr @var32, i32 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldeorl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_xor_i64_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i64_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldeorl x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -9260,19 +14325,29 @@ define dso_local i64 @test_atomic_load_xor_i64_release(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor ptr @var64, i64 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldeorl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_xor_i32_noret_release(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i32_noret_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldeorl w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -9281,18 +14356,28 @@ define dso_local void @test_atomic_load_xor_i32_noret_release(i32 %offset) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xor ptr @var32, i32 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldeorl w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_xor_i64_noret_release(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i64_noret_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldeorl x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret_release:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -9301,18 +14386,28 @@ define dso_local void @test_atomic_load_xor_i64_noret_release(i64 %offset) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret_release:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xor ptr @var64, i64 %offset release
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldeorl x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i8 @test_atomic_load_xor_i8_seq_cst(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i8_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var8
+; CHECK-NEXT:    add x8, x8, :lo12:var8
+; CHECK-NEXT:    ldeoralb w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -9321,19 +14416,30 @@ define dso_local i8 @test_atomic_load_xor_i8_seq_cst(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor1_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor1_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor ptr @var8, i8 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
-; CHECK: ldeoralb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_xor_i16_seq_cst(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i16_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var16
+; CHECK-NEXT:    add x8, x8, :lo12:var16
+; CHECK-NEXT:    ldeoralh w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -9342,19 +14448,30 @@ define dso_local i16 @test_atomic_load_xor_i16_seq_cst(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor2_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor2_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor ptr @var16, i16 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
-; CHECK: ldeoralh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_xor_i32_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i32_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldeoral w0, w0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -9363,19 +14480,30 @@ define dso_local i32 @test_atomic_load_xor_i32_seq_cst(i32 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldeoral w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_xor_i64_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i64_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldeoral x0, x0, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -9384,19 +14512,30 @@ define dso_local i64 @test_atomic_load_xor_i64_seq_cst(i64 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw xor ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldeoral x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
 
    ret i64 %old
 }
 
 define dso_local void @test_atomic_load_xor_i32_noret_seq_cst(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i32_noret_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var32
+; CHECK-NEXT:    add x8, x8, :lo12:var32
+; CHECK-NEXT:    ldeoral w0, w8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -9405,18 +14544,29 @@ define dso_local void @test_atomic_load_xor_i32_noret_seq_cst(i32 %offset) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor4_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xor ptr @var32, i32 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
-; CHECK: ldeoral w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local void @test_atomic_load_xor_i64_noret_seq_cst(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i64_noret_seq_cst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, var64
+; CHECK-NEXT:    add x8, x8, :lo12:var64
+; CHECK-NEXT:    ldeoral x0, x8, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret_seq_cst:
 ; OUTLINE-ATOMICS:       // %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -9425,23 +14575,55 @@ define dso_local void @test_atomic_load_xor_i64_noret_seq_cst(i64 %offset) nounw
 ; OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_acq_rel
 ; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret_seq_cst:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x1, var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC-OUTLINE-ATOMICS-NEXT:    bl __aarch64_ldeor8_acq_rel
+; MSVC-OUTLINE-ATOMICS-NEXT:    dmb ish
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
    atomicrmw xor ptr @var64, i64 %offset seq_cst
-; CHECK-NOT: dmb
-; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
-; CHECK: ldeoral x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
   ret void
 }
 
 define dso_local i128 @test_atomic_load_i128() nounwind {
 ; CHECK-LABEL: test_atomic_load_i128:
-; CHECK: casp
-
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x0, xzr
+; CHECK-NEXT:    mov x1, xzr
+; CHECK-NEXT:    adrp x8, var128
+; CHECK-NEXT:    add x8, x8, :lo12:var128
+; CHECK-NEXT:    casp x0, x1, x0, x1, [x8]
+; CHECK-NEXT:    ret
+;
 ; OUTLINE-ATOMICS-LABEL: test_atomic_load_i128:
-; OUTLINE-ATOMICS: ldxp
-; OUTLINE-ATOMICS: stxp
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var128
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var128
+; OUTLINE-ATOMICS-NEXT:  .LBB403_1: // %atomicrmw.start
+; OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE-ATOMICS-NEXT:    ldxp x0, x1, [x8]
+; OUTLINE-ATOMICS-NEXT:    stxp w9, x0, x1, [x8]
+; OUTLINE-ATOMICS-NEXT:    cbnz w9, .LBB403_1
+; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    ret
+;
+; MSVC-OUTLINE-ATOMICS-LABEL: test_atomic_load_i128:
+; MSVC-OUTLINE-ATOMICS:       // %bb.0:
+; MSVC-OUTLINE-ATOMICS-NEXT:    adrp x8, var128
+; MSVC-OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var128
+; MSVC-OUTLINE-ATOMICS-NEXT:  .LBB403_1: // %atomicrmw.start
+; MSVC-OUTLINE-ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC-OUTLINE-ATOMICS-NEXT:    ldxp x0, x1, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    stxp w9, x0, x1, [x8]
+; MSVC-OUTLINE-ATOMICS-NEXT:    cbnz w9, .LBB403_1
+; MSVC-OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC-OUTLINE-ATOMICS-NEXT:    ret
+
    %pair = load atomic i128, ptr @var128 monotonic, align 16
    ret i128 %pair
 }
diff --git a/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll b/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll
deleted file mode 100644
index bf78429da52f3..0000000000000
--- a/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll
+++ /dev/null
@@ -1,1019 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-windows-pc-msvc -disable-post-ra -verify-machineinstrs < %s | FileCheck %s
-
- at var8 = dso_local global i8 0
- at var16 = dso_local global i16 0
- at var32 = dso_local global i32 0
- at var64 = dso_local global i64 0
-
-define dso_local i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_add_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var8
-; CHECK-NEXT:    add x9, x9, :lo12:var8
-; CHECK-NEXT:  .LBB0_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxrb w8, [x9]
-; CHECK-NEXT:    add w10, w8, w0
-; CHECK-NEXT:    stlxrb w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB0_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    dmb ish
-; CHECK-NEXT:    ret
-   %old = atomicrmw add ptr @var8, i8 %offset seq_cst
-   ret i8 %old
-}
-
-define dso_local i16 @test_atomic_load_add_i16(i16 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_add_i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var16
-; CHECK-NEXT:    add x9, x9, :lo12:var16
-; CHECK-NEXT:  .LBB1_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxrh w8, [x9]
-; CHECK-NEXT:    add w10, w8, w0
-; CHECK-NEXT:    stxrh w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB1_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
-   %old = atomicrmw add ptr @var16, i16 %offset acquire
-   ret i16 %old
-}
-
-define dso_local i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_add_i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var32
-; CHECK-NEXT:    add x9, x9, :lo12:var32
-; CHECK-NEXT:  .LBB2_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxr w8, [x9]
-; CHECK-NEXT:    add w10, w8, w0
-; CHECK-NEXT:    stlxr w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB2_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
-   %old = atomicrmw add ptr @var32, i32 %offset release
-   ret i32 %old
-}
-
-define dso_local i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_add_i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var64
-; CHECK-NEXT:    add x9, x9, :lo12:var64
-; CHECK-NEXT:  .LBB3_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxr x8, [x9]
-; CHECK-NEXT:    add x10, x8, x0
-; CHECK-NEXT:    stxr w11, x10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB3_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov x0, x8
-; CHECK-NEXT:    ret
-   %old = atomicrmw add ptr @var64, i64 %offset monotonic
-   ret i64 %old
-}
-
-define dso_local i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_sub_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var8
-; CHECK-NEXT:    add x9, x9, :lo12:var8
-; CHECK-NEXT:  .LBB4_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxrb w8, [x9]
-; CHECK-NEXT:    sub w10, w8, w0
-; CHECK-NEXT:    stxrb w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB4_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
-   %old = atomicrmw sub ptr @var8, i8 %offset monotonic
-   ret i8 %old
-}
-
-define dso_local i16 @test_atomic_load_sub_i16(i16 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_sub_i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var16
-; CHECK-NEXT:    add x9, x9, :lo12:var16
-; CHECK-NEXT:  .LBB5_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxrh w8, [x9]
-; CHECK-NEXT:    sub w10, w8, w0
-; CHECK-NEXT:    stlxrh w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB5_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
-   %old = atomicrmw sub ptr @var16, i16 %offset release
-   ret i16 %old
-}
-
-define dso_local i32 @test_atomic_load_sub_i32(i32 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_sub_i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var32
-; CHECK-NEXT:    add x9, x9, :lo12:var32
-; CHECK-NEXT:  .LBB6_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxr w8, [x9]
-; CHECK-NEXT:    sub w10, w8, w0
-; CHECK-NEXT:    stxr w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB6_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
-   %old = atomicrmw sub ptr @var32, i32 %offset acquire
-   ret i32 %old
-}
-
-define dso_local i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_sub_i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, x0
-; CHECK-NEXT:    adrp x9, var64
-; CHECK-NEXT:    add x9, x9, :lo12:var64
-; CHECK-NEXT:  .LBB7_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxr x0, [x9]
-; CHECK-NEXT:    sub x10, x0, x8
-; CHECK-NEXT:    stlxr w11, x10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB7_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    dmb ish
-; CHECK-NEXT:    ret
-   %old = atomicrmw sub ptr @var64, i64 %offset seq_cst
-   ret i64 %old
-}
-
-define dso_local i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_and_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var8
-; CHECK-NEXT:    add x9, x9, :lo12:var8
-; CHECK-NEXT:  .LBB8_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxrb w8, [x9]
-; CHECK-NEXT:    and w10, w8, w0
-; CHECK-NEXT:    stlxrb w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB8_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
-   %old = atomicrmw and ptr @var8, i8 %offset release
-   ret i8 %old
-}
-
-define dso_local i16 @test_atomic_load_and_i16(i16 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_and_i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var16
-; CHECK-NEXT:    add x9, x9, :lo12:var16
-; CHECK-NEXT:  .LBB9_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxrh w8, [x9]
-; CHECK-NEXT:    and w10, w8, w0
-; CHECK-NEXT:    stxrh w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB9_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
-   %old = atomicrmw and ptr @var16, i16 %offset monotonic
-   ret i16 %old
-}
-
-define dso_local i32 @test_atomic_load_and_i32(i32 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_and_i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var32
-; CHECK-NEXT:    add x9, x9, :lo12:var32
-; CHECK-NEXT:  .LBB10_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxr w8, [x9]
-; CHECK-NEXT:    and w10, w8, w0
-; CHECK-NEXT:    stlxr w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB10_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    dmb ish
-; CHECK-NEXT:    ret
-   %old = atomicrmw and ptr @var32, i32 %offset seq_cst
-   ret i32 %old
-}
-
-define dso_local i64 @test_atomic_load_and_i64(i64 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_and_i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var64
-; CHECK-NEXT:    add x9, x9, :lo12:var64
-; CHECK-NEXT:  .LBB11_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxr x8, [x9]
-; CHECK-NEXT:    and x10, x8, x0
-; CHECK-NEXT:    stxr w11, x10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB11_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov x0, x8
-; CHECK-NEXT:    ret
-   %old = atomicrmw and ptr @var64, i64 %offset acquire
-   ret i64 %old
-}
-
-define dso_local i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_or_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var8
-; CHECK-NEXT:    add x9, x9, :lo12:var8
-; CHECK-NEXT:  .LBB12_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxrb w8, [x9]
-; CHECK-NEXT:    orr w10, w8, w0
-; CHECK-NEXT:    stlxrb w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB12_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    dmb ish
-; CHECK-NEXT:    ret
-   %old = atomicrmw or ptr @var8, i8 %offset seq_cst
-   ret i8 %old
-}
-
-define dso_local i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_or_i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var16
-; CHECK-NEXT:    add x9, x9, :lo12:var16
-; CHECK-NEXT:  .LBB13_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxrh w8, [x9]
-; CHECK-NEXT:    orr w10, w8, w0
-; CHECK-NEXT:    stxrh w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB13_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
-   %old = atomicrmw or ptr @var16, i16 %offset monotonic
-   ret i16 %old
-}
-
-define dso_local i32 @test_atomic_load_or_i32(i32 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_or_i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var32
-; CHECK-NEXT:    add x9, x9, :lo12:var32
-; CHECK-NEXT:  .LBB14_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxr w8, [x9]
-; CHECK-NEXT:    orr w10, w8, w0
-; CHECK-NEXT:    stxr w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB14_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
-   %old = atomicrmw or ptr @var32, i32 %offset acquire
-   ret i32 %old
-}
-
-define dso_local i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_or_i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var64
-; CHECK-NEXT:    add x9, x9, :lo12:var64
-; CHECK-NEXT:  .LBB15_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxr x8, [x9]
-; CHECK-NEXT:    orr x10, x8, x0
-; CHECK-NEXT:    stlxr w11, x10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB15_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov x0, x8
-; CHECK-NEXT:    ret
-   %old = atomicrmw or ptr @var64, i64 %offset release
-   ret i64 %old
-}
-
-define dso_local i8 @test_atomic_load_xor_i8(i8 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_xor_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var8
-; CHECK-NEXT:    add x9, x9, :lo12:var8
-; CHECK-NEXT:  .LBB16_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxrb w8, [x9]
-; CHECK-NEXT:    eor w10, w8, w0
-; CHECK-NEXT:    stxrb w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB16_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
-   %old = atomicrmw xor ptr @var8, i8 %offset acquire
-   ret i8 %old
-}
-
-define dso_local i16 @test_atomic_load_xor_i16(i16 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_xor_i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var16
-; CHECK-NEXT:    add x9, x9, :lo12:var16
-; CHECK-NEXT:  .LBB17_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxrh w8, [x9]
-; CHECK-NEXT:    eor w10, w8, w0
-; CHECK-NEXT:    stlxrh w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB17_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
-   %old = atomicrmw xor ptr @var16, i16 %offset release
-   ret i16 %old
-}
-
-define dso_local i32 @test_atomic_load_xor_i32(i32 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_xor_i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var32
-; CHECK-NEXT:    add x9, x9, :lo12:var32
-; CHECK-NEXT:  .LBB18_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxr w8, [x9]
-; CHECK-NEXT:    eor w10, w8, w0
-; CHECK-NEXT:    stlxr w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB18_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    dmb ish
-; CHECK-NEXT:    ret
-   %old = atomicrmw xor ptr @var32, i32 %offset seq_cst
-   ret i32 %old
-}
-
-define dso_local i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_xor_i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var64
-; CHECK-NEXT:    add x9, x9, :lo12:var64
-; CHECK-NEXT:  .LBB19_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxr x8, [x9]
-; CHECK-NEXT:    eor x10, x8, x0
-; CHECK-NEXT:    stxr w11, x10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB19_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov x0, x8
-; CHECK-NEXT:    ret
-   %old = atomicrmw xor ptr @var64, i64 %offset monotonic
-   ret i64 %old
-}
-
-define dso_local i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_xchg_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var8
-; CHECK-NEXT:    add x9, x9, :lo12:var8
-; CHECK-NEXT:  .LBB20_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxrb w8, [x9]
-; CHECK-NEXT:    stxrb w10, w0, [x9]
-; CHECK-NEXT:    cbnz w10, .LBB20_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
-   %old = atomicrmw xchg ptr @var8, i8 %offset monotonic
-   ret i8 %old
-}
-
-define dso_local i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_xchg_i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    adrp x9, var16
-; CHECK-NEXT:    add x9, x9, :lo12:var16
-; CHECK-NEXT:  .LBB21_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxrh w0, [x9]
-; CHECK-NEXT:    stlxrh w10, w8, [x9]
-; CHECK-NEXT:    cbnz w10, .LBB21_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT:    dmb ish
-; CHECK-NEXT:    ret
-   %old = atomicrmw xchg ptr @var16, i16 %offset seq_cst
-   ret i16 %old
-}
-
-define dso_local i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_xchg_i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    adrp x9, var32
-; CHECK-NEXT:    add x9, x9, :lo12:var32
-; CHECK-NEXT:  .LBB22_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxr w0, [x9]
-; CHECK-NEXT:    stlxr w10, w8, [x9]
-; CHECK-NEXT:    cbnz w10, .LBB22_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT:    ret
-   %old = atomicrmw xchg ptr @var32, i32 %offset release
-   ret i32 %old
-}
-
-define dso_local i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_xchg_i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var64
-; CHECK-NEXT:    add x9, x9, :lo12:var64
-; CHECK-NEXT:  .LBB23_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxr x8, [x9]
-; CHECK-NEXT:    stxr w10, x0, [x9]
-; CHECK-NEXT:    cbnz w10, .LBB23_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov x0, x8
-; CHECK-NEXT:    ret
-   %old = atomicrmw xchg ptr @var64, i64 %offset acquire
-   ret i64 %old
-}
-
-
-define dso_local i8 @test_atomic_load_min_i8(i8 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_min_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var8
-; CHECK-NEXT:    add x9, x9, :lo12:var8
-; CHECK-NEXT:  .LBB24_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxrb w10, [x9]
-; CHECK-NEXT:    sxtb w8, w10
-; CHECK-NEXT:    cmp w8, w0, sxtb
-; CHECK-NEXT:    csel w10, w10, w0, le
-; CHECK-NEXT:    stxrb w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB24_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
-   %old = atomicrmw min ptr @var8, i8 %offset acquire
-   ret i8 %old
-}
-
-define dso_local i16 @test_atomic_load_min_i16(i16 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_min_i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var16
-; CHECK-NEXT:    add x9, x9, :lo12:var16
-; CHECK-NEXT:  .LBB25_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxrh w10, [x9]
-; CHECK-NEXT:    sxth w8, w10
-; CHECK-NEXT:    cmp w8, w0, sxth
-; CHECK-NEXT:    csel w10, w10, w0, le
-; CHECK-NEXT:    stlxrh w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB25_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
-   %old = atomicrmw min ptr @var16, i16 %offset release
-   ret i16 %old
-}
-
-define dso_local i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_min_i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var32
-; CHECK-NEXT:    add x9, x9, :lo12:var32
-; CHECK-NEXT:  .LBB26_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxr w8, [x9]
-; CHECK-NEXT:    cmp w8, w0
-; CHECK-NEXT:    csel w10, w8, w0, le
-; CHECK-NEXT:    stxr w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB26_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
-   %old = atomicrmw min ptr @var32, i32 %offset monotonic
-   ret i32 %old
-}
-
-define dso_local i64 @test_atomic_load_min_i64(i64 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_min_i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, x0
-; CHECK-NEXT:    adrp x9, var64
-; CHECK-NEXT:    add x9, x9, :lo12:var64
-; CHECK-NEXT:  .LBB27_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxr x0, [x9]
-; CHECK-NEXT:    cmp x0, x8
-; CHECK-NEXT:    csel x10, x0, x8, le
-; CHECK-NEXT:    stlxr w11, x10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB27_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    dmb ish
-; CHECK-NEXT:    ret
-   %old = atomicrmw min ptr @var64, i64 %offset seq_cst
-   ret i64 %old
-}
-
-define dso_local i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_max_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var8
-; CHECK-NEXT:    add x9, x9, :lo12:var8
-; CHECK-NEXT:  .LBB28_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxrb w10, [x9]
-; CHECK-NEXT:    sxtb w8, w10
-; CHECK-NEXT:    cmp w8, w0, sxtb
-; CHECK-NEXT:    csel w10, w10, w0, gt
-; CHECK-NEXT:    stlxrb w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB28_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    dmb ish
-; CHECK-NEXT:    ret
-   %old = atomicrmw max ptr @var8, i8 %offset seq_cst
-   ret i8 %old
-}
-
-define dso_local i16 @test_atomic_load_max_i16(i16 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_max_i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var16
-; CHECK-NEXT:    add x9, x9, :lo12:var16
-; CHECK-NEXT:  .LBB29_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxrh w10, [x9]
-; CHECK-NEXT:    sxth w8, w10
-; CHECK-NEXT:    cmp w8, w0, sxth
-; CHECK-NEXT:    csel w10, w10, w0, gt
-; CHECK-NEXT:    stxrh w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB29_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
-   %old = atomicrmw max ptr @var16, i16 %offset acquire
-   ret i16 %old
-}
-
-define dso_local i32 @test_atomic_load_max_i32(i32 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_max_i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var32
-; CHECK-NEXT:    add x9, x9, :lo12:var32
-; CHECK-NEXT:  .LBB30_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxr w8, [x9]
-; CHECK-NEXT:    cmp w8, w0
-; CHECK-NEXT:    csel w10, w8, w0, gt
-; CHECK-NEXT:    stlxr w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB30_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
-   %old = atomicrmw max ptr @var32, i32 %offset release
-   ret i32 %old
-}
-
-define dso_local i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_max_i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var64
-; CHECK-NEXT:    add x9, x9, :lo12:var64
-; CHECK-NEXT:  .LBB31_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxr x8, [x9]
-; CHECK-NEXT:    cmp x8, x0
-; CHECK-NEXT:    csel x10, x8, x0, gt
-; CHECK-NEXT:    stxr w11, x10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB31_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov x0, x8
-; CHECK-NEXT:    ret
-   %old = atomicrmw max ptr @var64, i64 %offset monotonic
-   ret i64 %old
-}
-
-define dso_local i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_umin_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, var8
-; CHECK-NEXT:    add x8, x8, :lo12:var8
-; CHECK-NEXT:    and w9, w0, #0xff
-; CHECK-NEXT:  .LBB32_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxrb w0, [x8]
-; CHECK-NEXT:    cmp w0, w9
-; CHECK-NEXT:    csel w10, w0, w9, ls
-; CHECK-NEXT:    stxrb w11, w10, [x8]
-; CHECK-NEXT:    cbnz w11, .LBB32_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT:    ret
-   %old = atomicrmw umin ptr @var8, i8 %offset monotonic
-   ret i8 %old
-}
-
-define dso_local i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_umin_i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, var16
-; CHECK-NEXT:    add x8, x8, :lo12:var16
-; CHECK-NEXT:    and w9, w0, #0xffff
-; CHECK-NEXT:  .LBB33_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxrh w0, [x8]
-; CHECK-NEXT:    cmp w0, w9
-; CHECK-NEXT:    csel w10, w0, w9, ls
-; CHECK-NEXT:    stxrh w11, w10, [x8]
-; CHECK-NEXT:    cbnz w11, .LBB33_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT:    ret
-   %old = atomicrmw umin ptr @var16, i16 %offset acquire
-   ret i16 %old
-}
-
-define dso_local i32 @test_atomic_load_umin_i32(i32 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_umin_i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var32
-; CHECK-NEXT:    add x9, x9, :lo12:var32
-; CHECK-NEXT:  .LBB34_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxr w8, [x9]
-; CHECK-NEXT:    cmp w8, w0
-; CHECK-NEXT:    csel w10, w8, w0, ls
-; CHECK-NEXT:    stlxr w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB34_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    dmb ish
-; CHECK-NEXT:    ret
-   %old = atomicrmw umin ptr @var32, i32 %offset seq_cst
-   ret i32 %old
-}
-
-define dso_local i64 @test_atomic_load_umin_i64(i64 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_umin_i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var64
-; CHECK-NEXT:    add x9, x9, :lo12:var64
-; CHECK-NEXT:  .LBB35_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxr x8, [x9]
-; CHECK-NEXT:    cmp x8, x0
-; CHECK-NEXT:    csel x10, x8, x0, ls
-; CHECK-NEXT:    stlxr w11, x10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB35_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov x0, x8
-; CHECK-NEXT:    ret
-   %old = atomicrmw umin ptr @var64, i64 %offset acq_rel
-   ret i64 %old
-}
-
-define dso_local i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_umax_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, var8
-; CHECK-NEXT:    add x8, x8, :lo12:var8
-; CHECK-NEXT:    and w9, w0, #0xff
-; CHECK-NEXT:  .LBB36_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxrb w0, [x8]
-; CHECK-NEXT:    cmp w0, w9
-; CHECK-NEXT:    csel w10, w0, w9, hi
-; CHECK-NEXT:    stlxrb w11, w10, [x8]
-; CHECK-NEXT:    cbnz w11, .LBB36_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT:    ret
-   %old = atomicrmw umax ptr @var8, i8 %offset acq_rel
-   ret i8 %old
-}
-
-define dso_local i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_umax_i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, var16
-; CHECK-NEXT:    add x8, x8, :lo12:var16
-; CHECK-NEXT:    and w9, w0, #0xffff
-; CHECK-NEXT:  .LBB37_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxrh w0, [x8]
-; CHECK-NEXT:    cmp w0, w9
-; CHECK-NEXT:    csel w10, w0, w9, hi
-; CHECK-NEXT:    stxrh w11, w10, [x8]
-; CHECK-NEXT:    cbnz w11, .LBB37_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT:    ret
-   %old = atomicrmw umax ptr @var16, i16 %offset monotonic
-   ret i16 %old
-}
-
-define dso_local i32 @test_atomic_load_umax_i32(i32 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_umax_i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var32
-; CHECK-NEXT:    add x9, x9, :lo12:var32
-; CHECK-NEXT:  .LBB38_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxr w8, [x9]
-; CHECK-NEXT:    cmp w8, w0
-; CHECK-NEXT:    csel w10, w8, w0, hi
-; CHECK-NEXT:    stlxr w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB38_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    dmb ish
-; CHECK-NEXT:    ret
-   %old = atomicrmw umax ptr @var32, i32 %offset seq_cst
-   ret i32 %old
-}
-
-define dso_local i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_umax_i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var64
-; CHECK-NEXT:    add x9, x9, :lo12:var64
-; CHECK-NEXT:  .LBB39_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxr x8, [x9]
-; CHECK-NEXT:    cmp x8, x0
-; CHECK-NEXT:    csel x10, x8, x0, hi
-; CHECK-NEXT:    stlxr w11, x10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB39_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov x0, x8
-; CHECK-NEXT:    ret
-   %old = atomicrmw umax ptr @var64, i64 %offset release
-   ret i64 %old
-}
-
-define dso_local i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
-; CHECK-LABEL: test_atomic_cmpxchg_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w0, #0xff
-; CHECK-NEXT:    adrp x9, var8
-; CHECK-NEXT:    add x9, x9, :lo12:var8
-; CHECK-NEXT:  .LBB40_1: // %cmpxchg.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxrb w0, [x9]
-; CHECK-NEXT:    cmp w0, w8
-; CHECK-NEXT:    b.ne .LBB40_4
-; CHECK-NEXT:  // %bb.2: // %cmpxchg.trystore
-; CHECK-NEXT:    // in Loop: Header=BB40_1 Depth=1
-; CHECK-NEXT:    stxrb w10, w1, [x9]
-; CHECK-NEXT:    cbnz w10, .LBB40_1
-; CHECK-NEXT:  // %bb.3: // %cmpxchg.end
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB40_4: // %cmpxchg.nostore
-; CHECK-NEXT:    clrex
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT:    ret
-   %pair = cmpxchg ptr @var8, i8 %wanted, i8 %new acquire acquire
-   %old = extractvalue { i8, i1 } %pair, 0
-   ret i8 %old
-}
-
-define dso_local i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
-; CHECK-LABEL: test_atomic_cmpxchg_i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w0, #0xffff
-; CHECK-NEXT:    adrp x9, var16
-; CHECK-NEXT:    add x9, x9, :lo12:var16
-; CHECK-NEXT:  .LBB41_1: // %cmpxchg.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxrh w0, [x9]
-; CHECK-NEXT:    cmp w0, w8
-; CHECK-NEXT:    b.ne .LBB41_4
-; CHECK-NEXT:  // %bb.2: // %cmpxchg.trystore
-; CHECK-NEXT:    // in Loop: Header=BB41_1 Depth=1
-; CHECK-NEXT:    stlxrh w10, w1, [x9]
-; CHECK-NEXT:    cbnz w10, .LBB41_1
-; CHECK-NEXT:  // %bb.3: // %cmpxchg.success
-; CHECK-NEXT:    dmb ish
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB41_4: // %cmpxchg.nostore
-; CHECK-NEXT:    clrex
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT:    ret
-   %pair = cmpxchg ptr @var16, i16 %wanted, i16 %new seq_cst seq_cst
-   %old = extractvalue { i16, i1 } %pair, 0
-   ret i16 %old
-}
-
-define dso_local i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
-; CHECK-LABEL: test_atomic_cmpxchg_i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    adrp x9, var32
-; CHECK-NEXT:    add x9, x9, :lo12:var32
-; CHECK-NEXT:  .LBB42_1: // %cmpxchg.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxr w0, [x9]
-; CHECK-NEXT:    cmp w0, w8
-; CHECK-NEXT:    b.ne .LBB42_4
-; CHECK-NEXT:  // %bb.2: // %cmpxchg.trystore
-; CHECK-NEXT:    // in Loop: Header=BB42_1 Depth=1
-; CHECK-NEXT:    stlxr w10, w1, [x9]
-; CHECK-NEXT:    cbnz w10, .LBB42_1
-; CHECK-NEXT:  // %bb.3: // %cmpxchg.end
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB42_4: // %cmpxchg.nostore
-; CHECK-NEXT:    clrex
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT:    ret
-   %pair = cmpxchg ptr @var32, i32 %wanted, i32 %new release monotonic
-   %old = extractvalue { i32, i1 } %pair, 0
-   ret i32 %old
-}
-
-define dso_local void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
-; CHECK-LABEL: test_atomic_cmpxchg_i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var64
-; CHECK-NEXT:    add x9, x9, :lo12:var64
-; CHECK-NEXT:  .LBB43_1: // %cmpxchg.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxr x8, [x9]
-; CHECK-NEXT:    cmp x8, x0
-; CHECK-NEXT:    b.ne .LBB43_4
-; CHECK-NEXT:  // %bb.2: // %cmpxchg.trystore
-; CHECK-NEXT:    // in Loop: Header=BB43_1 Depth=1
-; CHECK-NEXT:    stxr w10, x1, [x9]
-; CHECK-NEXT:    cbnz w10, .LBB43_1
-; CHECK-NEXT:  .LBB43_3: // %cmpxchg.end
-; CHECK-NEXT:    adrp x9, var64
-; CHECK-NEXT:    str x8, [x9, :lo12:var64]
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB43_4: // %cmpxchg.nostore
-; CHECK-NEXT:    clrex
-; CHECK-NEXT:    b .LBB43_3
-   %pair = cmpxchg ptr @var64, i64 %wanted, i64 %new monotonic monotonic
-   %old = extractvalue { i64, i1 } %pair, 0
-   store i64 %old, ptr @var64
-   ret void
-}
-
-define dso_local i8 @test_atomic_load_monotonic_i8() nounwind {
-; CHECK-LABEL: test_atomic_load_monotonic_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, var8
-; CHECK-NEXT:    ldrb w0, [x8, :lo12:var8]
-; CHECK-NEXT:    ret
-  %val = load atomic i8, ptr @var8 monotonic, align 1
-  ret i8 %val
-}
-
-define dso_local i8 @test_atomic_load_monotonic_regoff_i8(i64 %base, i64 %off) nounwind {
-; CHECK-LABEL: test_atomic_load_monotonic_regoff_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldrb w0, [x0, x1]
-; CHECK-NEXT:    ret
-  %addr_int = add i64 %base, %off
-  %addr = inttoptr i64 %addr_int to ptr
-  %val = load atomic i8, ptr %addr monotonic, align 1
-  ret i8 %val
-}
-
-define dso_local i8 @test_atomic_load_acquire_i8() nounwind {
-; CHECK-LABEL: test_atomic_load_acquire_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, var8
-; CHECK-NEXT:    add x8, x8, :lo12:var8
-; CHECK-NEXT:    ldarb w0, [x8]
-; CHECK-NEXT:    ret
-  %val = load atomic i8, ptr @var8 acquire, align 1
-  ret i8 %val
-}
-
-define dso_local i8 @test_atomic_load_seq_cst_i8() nounwind {
-; CHECK-LABEL: test_atomic_load_seq_cst_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, var8
-; CHECK-NEXT:    add x8, x8, :lo12:var8
-; CHECK-NEXT:    ldarb w0, [x8]
-; CHECK-NEXT:    ret
-  %val = load atomic i8, ptr @var8 seq_cst, align 1
-  ret i8 %val
-}
-
-define dso_local i16 @test_atomic_load_monotonic_i16() nounwind {
-; CHECK-LABEL: test_atomic_load_monotonic_i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, var16
-; CHECK-NEXT:    ldrh w0, [x8, :lo12:var16]
-; CHECK-NEXT:    ret
-  %val = load atomic i16, ptr @var16 monotonic, align 2
-  ret i16 %val
-}
-
-define dso_local i32 @test_atomic_load_monotonic_regoff_i32(i64 %base, i64 %off) nounwind {
-; CHECK-LABEL: test_atomic_load_monotonic_regoff_i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr w0, [x0, x1]
-; CHECK-NEXT:    ret
-  %addr_int = add i64 %base, %off
-  %addr = inttoptr i64 %addr_int to ptr
-  %val = load atomic i32, ptr %addr monotonic, align 4
-  ret i32 %val
-}
-
-define dso_local i64 @test_atomic_load_seq_cst_i64() nounwind {
-; CHECK-LABEL: test_atomic_load_seq_cst_i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, var64
-; CHECK-NEXT:    add x8, x8, :lo12:var64
-; CHECK-NEXT:    ldar x0, [x8]
-; CHECK-NEXT:    ret
-  %val = load atomic i64, ptr @var64 seq_cst, align 8
-  ret i64 %val
-}
-
-define dso_local void @test_atomic_store_monotonic_i8(i8 %val) nounwind {
-; CHECK-LABEL: test_atomic_store_monotonic_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, var8
-; CHECK-NEXT:    strb w0, [x8, :lo12:var8]
-; CHECK-NEXT:    ret
-  store atomic i8 %val, ptr @var8 monotonic, align 1
-  ret void
-}
-
-define dso_local void @test_atomic_store_monotonic_regoff_i8(i64 %base, i64 %off, i8 %val) nounwind {
-; CHECK-LABEL: test_atomic_store_monotonic_regoff_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    strb w2, [x0, x1]
-; CHECK-NEXT:    ret
-  %addr_int = add i64 %base, %off
-  %addr = inttoptr i64 %addr_int to ptr
-  store atomic i8 %val, ptr %addr monotonic, align 1
-  ret void
-}
-define dso_local void @test_atomic_store_release_i8(i8 %val) nounwind {
-; CHECK-LABEL: test_atomic_store_release_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, var8
-; CHECK-NEXT:    add x8, x8, :lo12:var8
-; CHECK-NEXT:    stlrb w0, [x8]
-; CHECK-NEXT:    ret
-  store atomic i8 %val, ptr @var8 release, align 1
-  ret void
-}
-
-define dso_local void @test_atomic_store_seq_cst_i8(i8 %val) nounwind {
-; CHECK-LABEL: test_atomic_store_seq_cst_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, var8
-; CHECK-NEXT:    add x8, x8, :lo12:var8
-; CHECK-NEXT:    stlrb w0, [x8]
-; CHECK-NEXT:    dmb ish
-; CHECK-NEXT:    ret
-  store atomic i8 %val, ptr @var8 seq_cst, align 1
-  ret void
-}
-
-define dso_local void @test_atomic_store_monotonic_i16(i16 %val) nounwind {
-; CHECK-LABEL: test_atomic_store_monotonic_i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, var16
-; CHECK-NEXT:    strh w0, [x8, :lo12:var16]
-; CHECK-NEXT:    ret
-  store atomic i16 %val, ptr @var16 monotonic, align 2
-  ret void
-}
-
-define dso_local void @test_atomic_store_monotonic_regoff_i32(i64 %base, i64 %off, i32 %val) nounwind {
-; CHECK-LABEL: test_atomic_store_monotonic_regoff_i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str w2, [x0, x1]
-; CHECK-NEXT:    ret
-  %addr_int = add i64 %base, %off
-  %addr = inttoptr i64 %addr_int to ptr
-  store atomic i32 %val, ptr %addr monotonic, align 4
-  ret void
-}
-
-define dso_local void @test_atomic_store_release_i64(i64 %val) nounwind {
-; CHECK-LABEL: test_atomic_store_release_i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, var64
-; CHECK-NEXT:    add x8, x8, :lo12:var64
-; CHECK-NEXT:    stlr x0, [x8]
-; CHECK-NEXT:    ret
-  store atomic i64 %val, ptr @var64 release, align 8
-  ret void
-}
diff --git a/llvm/test/CodeGen/AArch64/atomic-ops.ll b/llvm/test/CodeGen/AArch64/atomic-ops.ll
index 1c2edd39e268d..0e1ee41e40139 100644
--- a/llvm/test/CodeGen/AArch64/atomic-ops.ll
+++ b/llvm/test/CodeGen/AArch64/atomic-ops.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -aarch64-enable-sink-fold=true < %s | FileCheck %s --check-prefixes=CHECK,INLINE_ATOMICS
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+outline-atomics -aarch64-enable-sink-fold=true < %s | FileCheck %s --check-prefixes=CHECK,OUTLINE_ATOMICS
+; RUN: llc -mtriple=aarch64-windows-pc-msvc -disable-post-ra -verify-machineinstrs -aarch64-enable-sink-fold=true < %s | FileCheck %s --check-prefixes=CHECK,MSVC_INLINE_ATOMICS
+; RUN: llc -mtriple=aarch64-windows-pc-msvc -disable-post-ra -verify-machineinstrs -mattr=+outline-atomics -aarch64-enable-sink-fold=true < %s | FileCheck %s --check-prefixes=CHECK,MSVC_OUTLINE_ATOMICS
 
 @var8 = dso_local global i8 0
 @var16 = dso_local global i16 0
@@ -30,6 +32,31 @@ define dso_local i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldadd1_acq_rel
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_load_add_i8:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var8
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB0_1: // %atomicrmw.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldaxrb w8, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    add w10, w8, w0
+; MSVC_INLINE_ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w11, .LBB0_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_INLINE_ATOMICS-NEXT:    mov w0, w8
+; MSVC_INLINE_ATOMICS-NEXT:    dmb ish
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_load_add_i8:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x1, var8
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC_OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldadd1_acq_rel
+; MSVC_OUTLINE_ATOMICS-NEXT:    dmb ish
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw add ptr @var8, i8 %offset seq_cst
    ret i8 %old
 }
@@ -57,6 +84,29 @@ define dso_local i16 @test_atomic_load_add_i16(i16 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldadd2_acq
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_load_add_i16:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var16
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB1_1: // %atomicrmw.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldaxrh w8, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    add w10, w8, w0
+; MSVC_INLINE_ATOMICS-NEXT:    stxrh w11, w10, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w11, .LBB1_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_INLINE_ATOMICS-NEXT:    mov w0, w8
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_load_add_i16:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x1, var16
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC_OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldadd2_acq
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw add ptr @var16, i16 %offset acquire
    ret i16 %old
 }
@@ -84,6 +134,29 @@ define dso_local i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldadd4_rel
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_load_add_i32:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var32
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB2_1: // %atomicrmw.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldxr w8, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    add w10, w8, w0
+; MSVC_INLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w11, .LBB2_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_INLINE_ATOMICS-NEXT:    mov w0, w8
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_load_add_i32:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x1, var32
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC_OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldadd4_rel
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw add ptr @var32, i32 %offset release
    ret i32 %old
 }
@@ -111,6 +184,29 @@ define dso_local i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldadd8_relax
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_load_add_i64:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var64
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB3_1: // %atomicrmw.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldxr x8, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    add x10, x8, x0
+; MSVC_INLINE_ATOMICS-NEXT:    stxr w11, x10, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w11, .LBB3_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_INLINE_ATOMICS-NEXT:    mov x0, x8
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_load_add_i64:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x1, var64
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC_OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldadd8_relax
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw add ptr @var64, i64 %offset monotonic
    ret i64 %old
 }
@@ -139,6 +235,30 @@ define dso_local i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldadd1_relax
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_load_sub_i8:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var8
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB4_1: // %atomicrmw.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldxrb w8, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    sub w10, w8, w0
+; MSVC_INLINE_ATOMICS-NEXT:    stxrb w11, w10, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w11, .LBB4_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_INLINE_ATOMICS-NEXT:    mov w0, w8
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_load_sub_i8:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC_OUTLINE_ATOMICS-NEXT:    neg w0, w0
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x1, var8
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC_OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldadd1_relax
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw sub ptr @var8, i8 %offset monotonic
    ret i8 %old
 }
@@ -167,6 +287,30 @@ define dso_local i16 @test_atomic_load_sub_i16(i16 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldadd2_rel
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_load_sub_i16:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var16
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB5_1: // %atomicrmw.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldxrh w8, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    sub w10, w8, w0
+; MSVC_INLINE_ATOMICS-NEXT:    stlxrh w11, w10, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w11, .LBB5_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_INLINE_ATOMICS-NEXT:    mov w0, w8
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_load_sub_i16:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC_OUTLINE_ATOMICS-NEXT:    neg w0, w0
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x1, var16
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC_OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldadd2_rel
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw sub ptr @var16, i16 %offset release
    ret i16 %old
 }
@@ -195,6 +339,30 @@ define dso_local i32 @test_atomic_load_sub_i32(i32 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldadd4_acq
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_load_sub_i32:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var32
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB6_1: // %atomicrmw.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    sub w10, w8, w0
+; MSVC_INLINE_ATOMICS-NEXT:    stxr w11, w10, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w11, .LBB6_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_INLINE_ATOMICS-NEXT:    mov w0, w8
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_load_sub_i32:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC_OUTLINE_ATOMICS-NEXT:    neg w0, w0
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x1, var32
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC_OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldadd4_acq
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw sub ptr @var32, i32 %offset acquire
    ret i32 %old
 }
@@ -223,6 +391,32 @@ define dso_local i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_load_sub_i64:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    mov x8, x0
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var64
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB7_1: // %atomicrmw.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldaxr x0, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    sub x10, x0, x8
+; MSVC_INLINE_ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w11, .LBB7_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_INLINE_ATOMICS-NEXT:    dmb ish
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_load_sub_i64:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC_OUTLINE_ATOMICS-NEXT:    neg x0, x0
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x1, var64
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC_OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
+; MSVC_OUTLINE_ATOMICS-NEXT:    dmb ish
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw sub ptr @var64, i64 %offset seq_cst
    ret i64 %old
 }
@@ -251,6 +445,30 @@ define dso_local i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldclr1_rel
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_load_and_i8:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var8
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB8_1: // %atomicrmw.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldxrb w8, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    and w10, w8, w0
+; MSVC_INLINE_ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w11, .LBB8_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_INLINE_ATOMICS-NEXT:    mov w0, w8
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_load_and_i8:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x1, var8
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC_OUTLINE_ATOMICS-NEXT:    mvn w0, w0
+; MSVC_OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldclr1_rel
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw and ptr @var8, i8 %offset release
    ret i8 %old
 }
@@ -279,6 +497,30 @@ define dso_local i16 @test_atomic_load_and_i16(i16 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldclr2_relax
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_load_and_i16:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var16
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB9_1: // %atomicrmw.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldxrh w8, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    and w10, w8, w0
+; MSVC_INLINE_ATOMICS-NEXT:    stxrh w11, w10, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w11, .LBB9_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_INLINE_ATOMICS-NEXT:    mov w0, w8
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_load_and_i16:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x1, var16
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC_OUTLINE_ATOMICS-NEXT:    mvn w0, w0
+; MSVC_OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldclr2_relax
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw and ptr @var16, i16 %offset monotonic
    ret i16 %old
 }
@@ -307,6 +549,32 @@ define dso_local i32 @test_atomic_load_and_i32(i32 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldclr4_acq_rel
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_load_and_i32:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var32
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB10_1: // %atomicrmw.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    and w10, w8, w0
+; MSVC_INLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w11, .LBB10_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_INLINE_ATOMICS-NEXT:    mov w0, w8
+; MSVC_INLINE_ATOMICS-NEXT:    dmb ish
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_load_and_i32:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x1, var32
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC_OUTLINE_ATOMICS-NEXT:    mvn w0, w0
+; MSVC_OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldclr4_acq_rel
+; MSVC_OUTLINE_ATOMICS-NEXT:    dmb ish
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw and ptr @var32, i32 %offset seq_cst
    ret i32 %old
 }
@@ -335,6 +603,30 @@ define dso_local i64 @test_atomic_load_and_i64(i64 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldclr8_acq
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_load_and_i64:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var64
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB11_1: // %atomicrmw.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldaxr x8, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    and x10, x8, x0
+; MSVC_INLINE_ATOMICS-NEXT:    stxr w11, x10, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w11, .LBB11_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_INLINE_ATOMICS-NEXT:    mov x0, x8
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_load_and_i64:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x1, var64
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC_OUTLINE_ATOMICS-NEXT:    mvn x0, x0
+; MSVC_OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldclr8_acq
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw and ptr @var64, i64 %offset acquire
    ret i64 %old
 }
@@ -362,6 +654,31 @@ define dso_local i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldset1_acq_rel
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_load_or_i8:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var8
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB12_1: // %atomicrmw.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldaxrb w8, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    orr w10, w8, w0
+; MSVC_INLINE_ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w11, .LBB12_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_INLINE_ATOMICS-NEXT:    mov w0, w8
+; MSVC_INLINE_ATOMICS-NEXT:    dmb ish
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_load_or_i8:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x1, var8
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC_OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldset1_acq_rel
+; MSVC_OUTLINE_ATOMICS-NEXT:    dmb ish
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw or ptr @var8, i8 %offset seq_cst
    ret i8 %old
 }
@@ -389,6 +706,29 @@ define dso_local i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldset2_relax
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_load_or_i16:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var16
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB13_1: // %atomicrmw.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldxrh w8, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    orr w10, w8, w0
+; MSVC_INLINE_ATOMICS-NEXT:    stxrh w11, w10, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w11, .LBB13_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_INLINE_ATOMICS-NEXT:    mov w0, w8
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_load_or_i16:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x1, var16
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC_OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldset2_relax
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw or ptr @var16, i16 %offset monotonic
    ret i16 %old
 }
@@ -416,6 +756,29 @@ define dso_local i32 @test_atomic_load_or_i32(i32 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldset4_acq
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_load_or_i32:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var32
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB14_1: // %atomicrmw.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    orr w10, w8, w0
+; MSVC_INLINE_ATOMICS-NEXT:    stxr w11, w10, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w11, .LBB14_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_INLINE_ATOMICS-NEXT:    mov w0, w8
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_load_or_i32:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x1, var32
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC_OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldset4_acq
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw or ptr @var32, i32 %offset acquire
    ret i32 %old
 }
@@ -443,6 +806,29 @@ define dso_local i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldset8_rel
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_load_or_i64:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var64
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB15_1: // %atomicrmw.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldxr x8, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    orr x10, x8, x0
+; MSVC_INLINE_ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w11, .LBB15_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_INLINE_ATOMICS-NEXT:    mov x0, x8
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_load_or_i64:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x1, var64
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC_OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldset8_rel
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw or ptr @var64, i64 %offset release
    ret i64 %old
 }
@@ -470,6 +856,29 @@ define dso_local i8 @test_atomic_load_xor_i8(i8 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldeor1_acq
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_load_xor_i8:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var8
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB16_1: // %atomicrmw.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldaxrb w8, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    eor w10, w8, w0
+; MSVC_INLINE_ATOMICS-NEXT:    stxrb w11, w10, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w11, .LBB16_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_INLINE_ATOMICS-NEXT:    mov w0, w8
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_load_xor_i8:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x1, var8
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC_OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldeor1_acq
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw xor ptr @var8, i8 %offset acquire
    ret i8 %old
 }
@@ -497,6 +906,29 @@ define dso_local i16 @test_atomic_load_xor_i16(i16 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldeor2_rel
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_load_xor_i16:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var16
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB17_1: // %atomicrmw.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldxrh w8, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    eor w10, w8, w0
+; MSVC_INLINE_ATOMICS-NEXT:    stlxrh w11, w10, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w11, .LBB17_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_INLINE_ATOMICS-NEXT:    mov w0, w8
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_load_xor_i16:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x1, var16
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC_OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldeor2_rel
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw xor ptr @var16, i16 %offset release
    ret i16 %old
 }
@@ -524,6 +956,31 @@ define dso_local i32 @test_atomic_load_xor_i32(i32 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldeor4_acq_rel
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_load_xor_i32:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var32
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB18_1: // %atomicrmw.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    eor w10, w8, w0
+; MSVC_INLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w11, .LBB18_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_INLINE_ATOMICS-NEXT:    mov w0, w8
+; MSVC_INLINE_ATOMICS-NEXT:    dmb ish
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_load_xor_i32:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x1, var32
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC_OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldeor4_acq_rel
+; MSVC_OUTLINE_ATOMICS-NEXT:    dmb ish
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw xor ptr @var32, i32 %offset seq_cst
    ret i32 %old
 }
@@ -551,6 +1008,29 @@ define dso_local i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldeor8_relax
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_load_xor_i64:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var64
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB19_1: // %atomicrmw.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldxr x8, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    eor x10, x8, x0
+; MSVC_INLINE_ATOMICS-NEXT:    stxr w11, x10, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w11, .LBB19_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_INLINE_ATOMICS-NEXT:    mov x0, x8
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_load_xor_i64:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x1, var64
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC_OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldeor8_relax
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw xor ptr @var64, i64 %offset monotonic
    ret i64 %old
 }
@@ -577,6 +1057,28 @@ define dso_local i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_swp1_relax
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_load_xchg_i8:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var8
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB20_1: // %atomicrmw.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldxrb w8, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    stxrb w10, w0, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w10, .LBB20_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_INLINE_ATOMICS-NEXT:    mov w0, w8
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_load_xchg_i8:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x1, var8
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var8
+; MSVC_OUTLINE_ATOMICS-NEXT:    bl __aarch64_swp1_relax
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw xchg ptr @var8, i8 %offset monotonic
    ret i8 %old
 }
@@ -603,6 +1105,31 @@ define dso_local i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_swp2_acq_rel
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_load_xchg_i16:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    mov w8, w0
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var16
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB21_1: // %atomicrmw.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldaxrh w0, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    stlxrh w10, w8, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w10, .LBB21_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; MSVC_INLINE_ATOMICS-NEXT:    dmb ish
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_load_xchg_i16:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x1, var16
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var16
+; MSVC_OUTLINE_ATOMICS-NEXT:    bl __aarch64_swp2_acq_rel
+; MSVC_OUTLINE_ATOMICS-NEXT:    dmb ish
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw xchg ptr @var16, i16 %offset seq_cst
    ret i16 %old
 }
@@ -630,6 +1157,29 @@ define dso_local i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_swp4_rel
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_load_xchg_i32:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    mov w8, w0
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var32
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB22_1: // %atomicrmw.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldxr w0, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    stlxr w10, w8, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w10, .LBB22_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_load_xchg_i32:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x1, var32
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var32
+; MSVC_OUTLINE_ATOMICS-NEXT:    bl __aarch64_swp4_rel
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw xchg ptr @var32, i32 %offset release
    ret i32 %old
 }
@@ -656,6 +1206,28 @@ define dso_local i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_swp8_acq
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_load_xchg_i64:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var64
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB23_1: // %atomicrmw.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldaxr x8, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    stxr w10, x0, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w10, .LBB23_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_INLINE_ATOMICS-NEXT:    mov x0, x8
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_load_xchg_i64:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x1, var64
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x1, x1, :lo12:var64
+; MSVC_OUTLINE_ATOMICS-NEXT:    bl __aarch64_swp8_acq
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw xchg ptr @var64, i64 %offset acquire
    ret i64 %old
 }
@@ -721,40 +1293,137 @@ define dso_local i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
 }
 
 define dso_local i64 @test_atomic_load_min_i64(i64 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_min_i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var64
-; CHECK-NEXT:    add x9, x9, :lo12:var64
-; CHECK-NEXT:  .LBB27_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxr x8, [x9]
-; CHECK-NEXT:    cmp x8, x0
-; CHECK-NEXT:    csel x10, x8, x0, le
-; CHECK-NEXT:    stlxr w11, x10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB27_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov x0, x8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_min_i64:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var64
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; INLINE_ATOMICS-NEXT:  .LBB27_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldaxr x8, [x9]
+; INLINE_ATOMICS-NEXT:    cmp x8, x0
+; INLINE_ATOMICS-NEXT:    csel x10, x8, x0, le
+; INLINE_ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB27_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov x0, x8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_min_i64:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE_ATOMICS-NEXT:  .LBB27_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxr x8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE_ATOMICS-NEXT:    csel x10, x8, x0, le
+; OUTLINE_ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB27_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov x0, x8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_load_min_i64:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    mov x8, x0
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var64
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB27_1: // %atomicrmw.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldaxr x0, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cmp x0, x8
+; MSVC_INLINE_ATOMICS-NEXT:    csel x10, x0, x8, le
+; MSVC_INLINE_ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w11, .LBB27_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_INLINE_ATOMICS-NEXT:    dmb ish
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_load_min_i64:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    mov x8, x0
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x9, var64
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC_OUTLINE_ATOMICS-NEXT:  .LBB27_1: // %atomicrmw.start
+; MSVC_OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldaxr x0, [x9]
+; MSVC_OUTLINE_ATOMICS-NEXT:    cmp x0, x8
+; MSVC_OUTLINE_ATOMICS-NEXT:    csel x10, x0, x8, le
+; MSVC_OUTLINE_ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; MSVC_OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB27_1
+; MSVC_OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_OUTLINE_ATOMICS-NEXT:    dmb ish
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw min ptr @var64, i64 %offset seq_cst
    ret i64 %old
 }
 
 define dso_local i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_max_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var8
-; CHECK-NEXT:    add x9, x9, :lo12:var8
-; CHECK-NEXT:  .LBB28_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxrb w10, [x9]
-; CHECK-NEXT:    sxtb w8, w10
-; CHECK-NEXT:    cmp w8, w0, sxtb
-; CHECK-NEXT:    csel w10, w10, w0, gt
-; CHECK-NEXT:    stlxrb w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB28_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_max_i8:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var8
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; INLINE_ATOMICS-NEXT:  .LBB28_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldaxrb w10, [x9]
+; INLINE_ATOMICS-NEXT:    sxtb w8, w10
+; INLINE_ATOMICS-NEXT:    cmp w8, w0, sxtb
+; INLINE_ATOMICS-NEXT:    csel w10, w10, w0, gt
+; INLINE_ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB28_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov w0, w8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_max_i8:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE_ATOMICS-NEXT:  .LBB28_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxrb w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    sxtb w8, w10
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0, sxtb
+; OUTLINE_ATOMICS-NEXT:    csel w10, w10, w0, gt
+; OUTLINE_ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB28_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_load_max_i8:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var8
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB28_1: // %atomicrmw.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldaxrb w10, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    sxtb w8, w10
+; MSVC_INLINE_ATOMICS-NEXT:    cmp w8, w0, sxtb
+; MSVC_INLINE_ATOMICS-NEXT:    csel w10, w10, w0, gt
+; MSVC_INLINE_ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w11, .LBB28_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_INLINE_ATOMICS-NEXT:    mov w0, w8
+; MSVC_INLINE_ATOMICS-NEXT:    dmb ish
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_load_max_i8:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x9, var8
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; MSVC_OUTLINE_ATOMICS-NEXT:  .LBB28_1: // %atomicrmw.start
+; MSVC_OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldaxrb w10, [x9]
+; MSVC_OUTLINE_ATOMICS-NEXT:    sxtb w8, w10
+; MSVC_OUTLINE_ATOMICS-NEXT:    cmp w8, w0, sxtb
+; MSVC_OUTLINE_ATOMICS-NEXT:    csel w10, w10, w0, gt
+; MSVC_OUTLINE_ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; MSVC_OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB28_1
+; MSVC_OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; MSVC_OUTLINE_ATOMICS-NEXT:    dmb ish
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw max ptr @var8, i8 %offset seq_cst
    ret i8 %old
 }
@@ -858,20 +1527,67 @@ define dso_local i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
 }
 
 define dso_local i32 @test_atomic_load_umin_i32(i32 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_umin_i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var32
-; CHECK-NEXT:    add x9, x9, :lo12:var32
-; CHECK-NEXT:  .LBB34_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxr w8, [x9]
-; CHECK-NEXT:    cmp w8, w0
-; CHECK-NEXT:    csel w10, w8, w0, ls
-; CHECK-NEXT:    stlxr w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB34_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_umin_i32:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var32
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; INLINE_ATOMICS-NEXT:  .LBB34_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; INLINE_ATOMICS-NEXT:    cmp w8, w0
+; INLINE_ATOMICS-NEXT:    csel w10, w8, w0, ls
+; INLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB34_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov w0, w8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_umin_i32:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE_ATOMICS-NEXT:  .LBB34_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE_ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB34_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_load_umin_i32:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var32
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB34_1: // %atomicrmw.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cmp w8, w0
+; MSVC_INLINE_ATOMICS-NEXT:    csel w10, w8, w0, ls
+; MSVC_INLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w11, .LBB34_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_INLINE_ATOMICS-NEXT:    mov w0, w8
+; MSVC_INLINE_ATOMICS-NEXT:    dmb ish
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_load_umin_i32:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x9, var32
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC_OUTLINE_ATOMICS-NEXT:  .LBB34_1: // %atomicrmw.start
+; MSVC_OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; MSVC_OUTLINE_ATOMICS-NEXT:    cmp w8, w0
+; MSVC_OUTLINE_ATOMICS-NEXT:    csel w10, w8, w0, ls
+; MSVC_OUTLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; MSVC_OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB34_1
+; MSVC_OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; MSVC_OUTLINE_ATOMICS-NEXT:    dmb ish
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var32, i32 %offset seq_cst
    ret i32 %old
 }
@@ -936,20 +1652,67 @@ define dso_local i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
 }
 
 define dso_local i32 @test_atomic_load_umax_i32(i32 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_umax_i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var32
-; CHECK-NEXT:    add x9, x9, :lo12:var32
-; CHECK-NEXT:  .LBB38_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxr w8, [x9]
-; CHECK-NEXT:    cmp w8, w0
-; CHECK-NEXT:    csel w10, w8, w0, hi
-; CHECK-NEXT:    stlxr w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB38_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_umax_i32:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var32
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; INLINE_ATOMICS-NEXT:  .LBB38_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; INLINE_ATOMICS-NEXT:    cmp w8, w0
+; INLINE_ATOMICS-NEXT:    csel w10, w8, w0, hi
+; INLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB38_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov w0, w8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_umax_i32:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE_ATOMICS-NEXT:  .LBB38_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE_ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB38_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_load_umax_i32:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var32
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB38_1: // %atomicrmw.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cmp w8, w0
+; MSVC_INLINE_ATOMICS-NEXT:    csel w10, w8, w0, hi
+; MSVC_INLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w11, .LBB38_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_INLINE_ATOMICS-NEXT:    mov w0, w8
+; MSVC_INLINE_ATOMICS-NEXT:    dmb ish
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_load_umax_i32:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x9, var32
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC_OUTLINE_ATOMICS-NEXT:  .LBB38_1: // %atomicrmw.start
+; MSVC_OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; MSVC_OUTLINE_ATOMICS-NEXT:    cmp w8, w0
+; MSVC_OUTLINE_ATOMICS-NEXT:    csel w10, w8, w0, hi
+; MSVC_OUTLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; MSVC_OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB38_1
+; MSVC_OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; MSVC_OUTLINE_ATOMICS-NEXT:    dmb ish
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var32, i32 %offset seq_cst
    ret i32 %old
 }
@@ -1012,20 +1775,67 @@ define dso_local i16 @test_atomic_load_uinc_wrap_i16(i16 %offset) nounwind {
 }
 
 define dso_local i32 @test_atomic_load_uinc_wrap_i32(i32 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_uinc_wrap_i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var32
-; CHECK-NEXT:    add x9, x9, :lo12:var32
-; CHECK-NEXT:  .LBB42_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxr w8, [x9]
-; CHECK-NEXT:    cmp w8, w0
-; CHECK-NEXT:    csinc w10, wzr, w8, hs
-; CHECK-NEXT:    stlxr w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB42_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_uinc_wrap_i32:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var32
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; INLINE_ATOMICS-NEXT:  .LBB42_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; INLINE_ATOMICS-NEXT:    cmp w8, w0
+; INLINE_ATOMICS-NEXT:    csinc w10, wzr, w8, hs
+; INLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB42_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov w0, w8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_uinc_wrap_i32:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE_ATOMICS-NEXT:  .LBB42_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE_ATOMICS-NEXT:    csinc w10, wzr, w8, hs
+; OUTLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB42_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_load_uinc_wrap_i32:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var32
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB42_1: // %atomicrmw.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cmp w8, w0
+; MSVC_INLINE_ATOMICS-NEXT:    csinc w10, wzr, w8, hs
+; MSVC_INLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w11, .LBB42_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_INLINE_ATOMICS-NEXT:    mov w0, w8
+; MSVC_INLINE_ATOMICS-NEXT:    dmb ish
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_load_uinc_wrap_i32:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x9, var32
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC_OUTLINE_ATOMICS-NEXT:  .LBB42_1: // %atomicrmw.start
+; MSVC_OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; MSVC_OUTLINE_ATOMICS-NEXT:    cmp w8, w0
+; MSVC_OUTLINE_ATOMICS-NEXT:    csinc w10, wzr, w8, hs
+; MSVC_OUTLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; MSVC_OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB42_1
+; MSVC_OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; MSVC_OUTLINE_ATOMICS-NEXT:    dmb ish
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw uinc_wrap ptr @var32, i32 %offset seq_cst
    ret i32 %old
 }
@@ -1092,22 +1902,75 @@ define dso_local i16 @test_atomic_load_udec_wrap_i16(i16 %offset) nounwind {
 }
 
 define dso_local i32 @test_atomic_load_udec_wrap_i32(i32 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_udec_wrap_i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var32
-; CHECK-NEXT:    add x9, x9, :lo12:var32
-; CHECK-NEXT:  .LBB46_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxr w8, [x9]
-; CHECK-NEXT:    cmp w8, w0
-; CHECK-NEXT:    sub w10, w8, #1
-; CHECK-NEXT:    ccmp w8, #0, #4, ls
-; CHECK-NEXT:    csel w10, w0, w10, eq
-; CHECK-NEXT:    stlxr w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB46_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_udec_wrap_i32:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var32
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; INLINE_ATOMICS-NEXT:  .LBB46_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; INLINE_ATOMICS-NEXT:    cmp w8, w0
+; INLINE_ATOMICS-NEXT:    sub w10, w8, #1
+; INLINE_ATOMICS-NEXT:    ccmp w8, #0, #4, ls
+; INLINE_ATOMICS-NEXT:    csel w10, w0, w10, eq
+; INLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB46_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov w0, w8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_udec_wrap_i32:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE_ATOMICS-NEXT:  .LBB46_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE_ATOMICS-NEXT:    sub w10, w8, #1
+; OUTLINE_ATOMICS-NEXT:    ccmp w8, #0, #4, ls
+; OUTLINE_ATOMICS-NEXT:    csel w10, w0, w10, eq
+; OUTLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB46_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_load_udec_wrap_i32:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var32
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB46_1: // %atomicrmw.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cmp w8, w0
+; MSVC_INLINE_ATOMICS-NEXT:    sub w10, w8, #1
+; MSVC_INLINE_ATOMICS-NEXT:    ccmp w8, #0, #4, ls
+; MSVC_INLINE_ATOMICS-NEXT:    csel w10, w0, w10, eq
+; MSVC_INLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w11, .LBB46_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_INLINE_ATOMICS-NEXT:    mov w0, w8
+; MSVC_INLINE_ATOMICS-NEXT:    dmb ish
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_load_udec_wrap_i32:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x9, var32
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC_OUTLINE_ATOMICS-NEXT:  .LBB46_1: // %atomicrmw.start
+; MSVC_OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; MSVC_OUTLINE_ATOMICS-NEXT:    cmp w8, w0
+; MSVC_OUTLINE_ATOMICS-NEXT:    sub w10, w8, #1
+; MSVC_OUTLINE_ATOMICS-NEXT:    ccmp w8, #0, #4, ls
+; MSVC_OUTLINE_ATOMICS-NEXT:    csel w10, w0, w10, eq
+; MSVC_OUTLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; MSVC_OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB46_1
+; MSVC_OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; MSVC_OUTLINE_ATOMICS-NEXT:    dmb ish
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw udec_wrap ptr @var32, i32 %offset seq_cst
    ret i32 %old
 }
@@ -1174,20 +2037,67 @@ define dso_local i16 @test_atomic_load_usub_cond_i16(i16 %offset) nounwind {
 }
 
 define dso_local i32 @test_atomic_load_usub_cond_i32(i32 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_usub_cond_i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var32
-; CHECK-NEXT:    add x9, x9, :lo12:var32
-; CHECK-NEXT:  .LBB50_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxr w8, [x9]
-; CHECK-NEXT:    subs w10, w8, w0
-; CHECK-NEXT:    csel w10, w10, w8, hs
-; CHECK-NEXT:    stlxr w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB50_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_usub_cond_i32:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var32
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; INLINE_ATOMICS-NEXT:  .LBB50_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; INLINE_ATOMICS-NEXT:    subs w10, w8, w0
+; INLINE_ATOMICS-NEXT:    csel w10, w10, w8, hs
+; INLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB50_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov w0, w8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_usub_cond_i32:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE_ATOMICS-NEXT:  .LBB50_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; OUTLINE_ATOMICS-NEXT:    subs w10, w8, w0
+; OUTLINE_ATOMICS-NEXT:    csel w10, w10, w8, hs
+; OUTLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB50_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_load_usub_cond_i32:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var32
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB50_1: // %atomicrmw.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    subs w10, w8, w0
+; MSVC_INLINE_ATOMICS-NEXT:    csel w10, w10, w8, hs
+; MSVC_INLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w11, .LBB50_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_INLINE_ATOMICS-NEXT:    mov w0, w8
+; MSVC_INLINE_ATOMICS-NEXT:    dmb ish
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_load_usub_cond_i32:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x9, var32
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC_OUTLINE_ATOMICS-NEXT:  .LBB50_1: // %atomicrmw.start
+; MSVC_OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; MSVC_OUTLINE_ATOMICS-NEXT:    subs w10, w8, w0
+; MSVC_OUTLINE_ATOMICS-NEXT:    csel w10, w10, w8, hs
+; MSVC_OUTLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; MSVC_OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB50_1
+; MSVC_OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; MSVC_OUTLINE_ATOMICS-NEXT:    dmb ish
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw usub_cond ptr @var32, i32 %offset seq_cst
    ret i32 %old
 }
@@ -1250,20 +2160,67 @@ define dso_local i16 @test_atomic_load_usub_sat_i16(i16 %offset) nounwind {
 }
 
 define dso_local i32 @test_atomic_load_usub_sat_i32(i32 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_usub_sat_i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var32
-; CHECK-NEXT:    add x9, x9, :lo12:var32
-; CHECK-NEXT:  .LBB54_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxr w8, [x9]
-; CHECK-NEXT:    subs w10, w8, w0
-; CHECK-NEXT:    csel w10, wzr, w10, lo
-; CHECK-NEXT:    stlxr w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB54_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_usub_sat_i32:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var32
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; INLINE_ATOMICS-NEXT:  .LBB54_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; INLINE_ATOMICS-NEXT:    subs w10, w8, w0
+; INLINE_ATOMICS-NEXT:    csel w10, wzr, w10, lo
+; INLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB54_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov w0, w8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_usub_sat_i32:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE_ATOMICS-NEXT:  .LBB54_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; OUTLINE_ATOMICS-NEXT:    subs w10, w8, w0
+; OUTLINE_ATOMICS-NEXT:    csel w10, wzr, w10, lo
+; OUTLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB54_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_load_usub_sat_i32:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var32
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB54_1: // %atomicrmw.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    subs w10, w8, w0
+; MSVC_INLINE_ATOMICS-NEXT:    csel w10, wzr, w10, lo
+; MSVC_INLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w11, .LBB54_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_INLINE_ATOMICS-NEXT:    mov w0, w8
+; MSVC_INLINE_ATOMICS-NEXT:    dmb ish
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_load_usub_sat_i32:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x9, var32
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC_OUTLINE_ATOMICS-NEXT:  .LBB54_1: // %atomicrmw.start
+; MSVC_OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; MSVC_OUTLINE_ATOMICS-NEXT:    subs w10, w8, w0
+; MSVC_OUTLINE_ATOMICS-NEXT:    csel w10, wzr, w10, lo
+; MSVC_OUTLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; MSVC_OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB54_1
+; MSVC_OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; MSVC_OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; MSVC_OUTLINE_ATOMICS-NEXT:    dmb ish
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %old = atomicrmw usub_sat ptr @var32, i32 %offset seq_cst
    ret i32 %old
 }
@@ -1318,6 +2275,37 @@ define dso_local i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_cas1_acq
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i8:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    and w8, w0, #0xff
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var8
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB56_1: // %cmpxchg.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldaxrb w0, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cmp w0, w8
+; MSVC_INLINE_ATOMICS-NEXT:    b.ne .LBB56_4
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %cmpxchg.trystore
+; MSVC_INLINE_ATOMICS-NEXT:    // in Loop: Header=BB56_1 Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    stxrb w10, w1, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w10, .LBB56_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.3: // %cmpxchg.end
+; MSVC_INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB56_4: // %cmpxchg.nostore
+; MSVC_INLINE_ATOMICS-NEXT:    clrex
+; MSVC_INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i8:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x2, var8
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x2, x2, :lo12:var8
+; MSVC_OUTLINE_ATOMICS-NEXT:    bl __aarch64_cas1_acq
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %pair = cmpxchg ptr @var8, i8 %wanted, i8 %new acquire acquire
    %old = extractvalue { i8, i1 } %pair, 0
    ret i8 %old
@@ -1354,6 +2342,39 @@ define dso_local i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_cas2_acq_rel
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i16:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    and w8, w0, #0xffff
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var16
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB57_1: // %cmpxchg.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldaxrh w0, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cmp w0, w8
+; MSVC_INLINE_ATOMICS-NEXT:    b.ne .LBB57_4
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %cmpxchg.trystore
+; MSVC_INLINE_ATOMICS-NEXT:    // in Loop: Header=BB57_1 Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    stlxrh w10, w1, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w10, .LBB57_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.3: // %cmpxchg.success
+; MSVC_INLINE_ATOMICS-NEXT:    dmb ish
+; MSVC_INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB57_4: // %cmpxchg.nostore
+; MSVC_INLINE_ATOMICS-NEXT:    clrex
+; MSVC_INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i16:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x2, var16
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x2, x2, :lo12:var16
+; MSVC_OUTLINE_ATOMICS-NEXT:    bl __aarch64_cas2_acq_rel
+; MSVC_OUTLINE_ATOMICS-NEXT:    dmb ish
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %pair = cmpxchg ptr @var16, i16 %wanted, i16 %new seq_cst seq_cst
    %old = extractvalue { i16, i1 } %pair, 0
    ret i16 %old
@@ -1390,6 +2411,37 @@ define dso_local i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_cas4_rel
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i32:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    mov w8, w0
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var32
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB58_1: // %cmpxchg.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldxr w0, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cmp w0, w8
+; MSVC_INLINE_ATOMICS-NEXT:    b.ne .LBB58_4
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %cmpxchg.trystore
+; MSVC_INLINE_ATOMICS-NEXT:    // in Loop: Header=BB58_1 Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    stlxr w10, w1, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w10, .LBB58_1
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.3: // %cmpxchg.end
+; MSVC_INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB58_4: // %cmpxchg.nostore
+; MSVC_INLINE_ATOMICS-NEXT:    clrex
+; MSVC_INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i32:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x2, var32
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x2, x2, :lo12:var32
+; MSVC_OUTLINE_ATOMICS-NEXT:    bl __aarch64_cas4_rel
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %pair = cmpxchg ptr @var32, i32 %wanted, i32 %new release monotonic
    %old = extractvalue { i32, i1 } %pair, 0
    ret i32 %old
@@ -1427,6 +2479,38 @@ define dso_local void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    str x0, [x19]
 ; OUTLINE_ATOMICS-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i64:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var64
+; MSVC_INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB59_1: // %cmpxchg.start
+; MSVC_INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    ldxr x8, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cmp x8, x0
+; MSVC_INLINE_ATOMICS-NEXT:    b.ne .LBB59_4
+; MSVC_INLINE_ATOMICS-NEXT:  // %bb.2: // %cmpxchg.trystore
+; MSVC_INLINE_ATOMICS-NEXT:    // in Loop: Header=BB59_1 Depth=1
+; MSVC_INLINE_ATOMICS-NEXT:    stxr w10, x1, [x9]
+; MSVC_INLINE_ATOMICS-NEXT:    cbnz w10, .LBB59_1
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB59_3: // %cmpxchg.end
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x9, var64
+; MSVC_INLINE_ATOMICS-NEXT:    str x8, [x9, :lo12:var64]
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+; MSVC_INLINE_ATOMICS-NEXT:  .LBB59_4: // %cmpxchg.nostore
+; MSVC_INLINE_ATOMICS-NEXT:    clrex
+; MSVC_INLINE_ATOMICS-NEXT:    b .LBB59_3
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i64:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x19, var64
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x19, x19, :lo12:var64
+; MSVC_OUTLINE_ATOMICS-NEXT:    mov x2, x19
+; MSVC_OUTLINE_ATOMICS-NEXT:    bl __aarch64_cas8_relax
+; MSVC_OUTLINE_ATOMICS-NEXT:    str x0, [x19]
+; MSVC_OUTLINE_ATOMICS-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
    %pair = cmpxchg ptr @var64, i64 %wanted, i64 %new monotonic monotonic
    %old = extractvalue { i64, i1 } %pair, 0
    store i64 %old, ptr @var64
@@ -1540,12 +2624,35 @@ define dso_local void @test_atomic_store_release_i8(i8 %val) nounwind {
 }
 
 define dso_local void @test_atomic_store_seq_cst_i8(i8 %val) nounwind {
-; CHECK-LABEL: test_atomic_store_seq_cst_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, var8
-; CHECK-NEXT:    add x8, x8, :lo12:var8
-; CHECK-NEXT:    stlrb w0, [x8]
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_store_seq_cst_i8:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x8, var8
+; INLINE_ATOMICS-NEXT:    add x8, x8, :lo12:var8
+; INLINE_ATOMICS-NEXT:    stlrb w0, [x8]
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_store_seq_cst_i8:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x8, var8
+; OUTLINE_ATOMICS-NEXT:    add x8, x8, :lo12:var8
+; OUTLINE_ATOMICS-NEXT:    stlrb w0, [x8]
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_INLINE_ATOMICS-LABEL: test_atomic_store_seq_cst_i8:
+; MSVC_INLINE_ATOMICS:       // %bb.0:
+; MSVC_INLINE_ATOMICS-NEXT:    adrp x8, var8
+; MSVC_INLINE_ATOMICS-NEXT:    add x8, x8, :lo12:var8
+; MSVC_INLINE_ATOMICS-NEXT:    stlrb w0, [x8]
+; MSVC_INLINE_ATOMICS-NEXT:    dmb ish
+; MSVC_INLINE_ATOMICS-NEXT:    ret
+;
+; MSVC_OUTLINE_ATOMICS-LABEL: test_atomic_store_seq_cst_i8:
+; MSVC_OUTLINE_ATOMICS:       // %bb.0:
+; MSVC_OUTLINE_ATOMICS-NEXT:    adrp x8, var8
+; MSVC_OUTLINE_ATOMICS-NEXT:    add x8, x8, :lo12:var8
+; MSVC_OUTLINE_ATOMICS-NEXT:    stlrb w0, [x8]
+; MSVC_OUTLINE_ATOMICS-NEXT:    dmb ish
+; MSVC_OUTLINE_ATOMICS-NEXT:    ret
   store atomic i8 %val, ptr @var8 seq_cst, align 1
   ret void
 }

>From ec246cfbd41163d1359e2f71d90105377c3631a8 Mon Sep 17 00:00:00 2001
From: Usman Nadeem <mnadeem at qti.qualcomm.com>
Date: Wed, 26 Nov 2025 00:07:17 -0800
Subject: [PATCH 2/2] Simplify checks.

Change-Id: Iea7753ccc9fe18c8fb70ab0dc4a1117cd619f49f
---
 llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a99413fe03431..0afc770451b66 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -29471,15 +29471,8 @@ bool AArch64TargetLowering::storeNeedsSeqCstTrailingFence(
   if (RMW && RMW->getOrdering() != AtomicOrdering::SequentiallyConsistent)
     return false;
 
-  // We do not need a fence only if we have LSE and are not expanding.
-  TargetLoweringBase::AtomicExpansionKind ExpandKind =
-      CAS ? shouldExpandAtomicCmpXchgInIR(CAS) : shouldExpandAtomicRMWInIR(RMW);
-  if (ExpandKind == AtomicExpansionKind::None && Subtarget->hasLSE())
-    return false;
-  if (RMW && ExpandKind == AtomicExpansionKind::CmpXChg && Subtarget->hasLSE())
-    return false;
-
-  return true;
+  // We do not need a fence if we have LSE atomics.
+  return !Subtarget->hasLSE();
 }
 
 // Loads and stores less than 128-bits are already atomic; ones above that



More information about the llvm-commits mailing list